From 431f6eb3570f286036bc8718a908a283f5d99473 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Sep 2018 00:08:20 -0400 Subject: SUNRPC: Add a label for RPC calls that require allocation on receive If the RPC call relies on the receive call allocating pages as buffers, then let's label it so that we a) Don't leak memory by allocating pages for requests that do not expect this behaviour b) Can optimise for the common case where calls do not require allocation. Signed-off-by: Trond Myklebust --- fs/nfs/nfs3xdr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 64e4fa33d89f..d8c4c10b15f7 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1364,10 +1364,12 @@ static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req, encode_nfs_fh3(xdr, args->fh); encode_uint32(xdr, args->mask); - if (args->mask & (NFS_ACL | NFS_DFACL)) + if (args->mask & (NFS_ACL | NFS_DFACL)) { prepare_reply_buffer(req, args->pages, 0, NFSACL_MAXPAGES << PAGE_SHIFT, ACL3_getaclres_sz); + req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES; + } } static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req, -- cgit v1.2.3 From a2791d3a2cee9432e78bbdcfde586ef54e32d223 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 3 Sep 2018 12:57:52 -0400 Subject: pNFS: Don't zero out the array in nfs4_alloc_pages() We don't need a zeroed out array, since it is immediately being filled. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 7d9a51e6b847..6b5b2d36f502 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -965,7 +965,7 @@ static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) struct page **pages; int i; - pages = kcalloc(size, sizeof(struct page *), gfp_flags); + pages = kmalloc_array(size, sizeof(struct page *), gfp_flags); if (!pages) { dprintk("%s: can't alloc array of %zu pages\n", __func__, size); return NULL; @@ -975,7 +975,7 @@ static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags) pages[i] = alloc_page(gfp_flags); if (!pages[i]) { dprintk("%s: failed to allocate page\n", __func__); - nfs4_free_pages(pages, size); + nfs4_free_pages(pages, i); return NULL; } } -- cgit v1.2.3 From 28ced9a84cd2f9fc68a081fb3b34e70c5d459be3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 3 Sep 2018 13:12:15 -0400 Subject: pNFS: Don't allocate more pages than we need to fit a layoutget response For the 'files' and 'flexfiles' layout types, we do not expect the reply to be any larger than 4k. The block and scsi layout types are a little more greedy, so we keep allocating the maximum response size for now. Signed-off-by: Trond Myklebust --- fs/nfs/filelayout/filelayout.c | 1 + fs/nfs/flexfilelayout/flexfilelayout.c | 1 + fs/nfs/pnfs.c | 7 +++++++ fs/nfs/pnfs.h | 1 + 4 files changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index d175724ff566..61f46facb39c 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -1164,6 +1164,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { .id = LAYOUT_NFSV4_1_FILES, .name = "LAYOUT_NFSV4_1_FILES", .owner = THIS_MODULE, + .max_layoutget_response = 4096, /* 1 page or so... */ .alloc_layout_hdr = filelayout_alloc_layout_hdr, .free_layout_hdr = filelayout_free_layout_hdr, .alloc_lseg = filelayout_alloc_lseg, diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index cae43333ef16..86bcba40ca61 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -2356,6 +2356,7 @@ static struct pnfs_layoutdriver_type flexfilelayout_type = { .name = "LAYOUT_FLEX_FILES", .owner = THIS_MODULE, .flags = PNFS_LAYOUTGET_ON_OPEN, + .max_layoutget_response = 4096, /* 1 page or so... */ .set_layoutdriver = ff_layout_set_layoutdriver, .alloc_layout_hdr = ff_layout_alloc_layout_hdr, .free_layout_hdr = ff_layout_free_layout_hdr, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6b5b2d36f502..c5672c02afd6 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -991,6 +991,7 @@ pnfs_alloc_init_layoutget_args(struct inode *ino, gfp_t gfp_flags) { struct nfs_server *server = pnfs_find_server(ino, ctx); + size_t max_reply_sz = server->pnfs_curr_ld->max_layoutget_response; size_t max_pages = max_response_pages(server); struct nfs4_layoutget *lgp; @@ -1000,6 +1001,12 @@ pnfs_alloc_init_layoutget_args(struct inode *ino, if (lgp == NULL) return NULL; + if (max_reply_sz) { + size_t npages = (max_reply_sz + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (npages < max_pages) + max_pages = npages; + } + lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); if (!lgp->args.layout.pages) { kfree(lgp); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index ece367ebde69..e2e9fcd5341d 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -125,6 +125,7 @@ struct pnfs_layoutdriver_type { struct module *owner; unsigned flags; unsigned max_deviceinfo_size; + unsigned max_layoutget_response; int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *); int (*clear_layoutdriver) (struct nfs_server *); -- cgit v1.2.3 From 1db97eaa0b482a738c715da6edb023d6f99e50b0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Sep 2018 15:11:57 -0400 Subject: NFS: Convert lookups of the lock context to RCU Speed up lookups of an existing lock context by avoiding the inode->i_lock, and using RCU instead. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 25 ++++++++++++------------- include/linux/nfs_fs.h | 1 + 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b65aee481d13..09b3b7146ff4 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -857,15 +857,14 @@ static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx) { - struct nfs_lock_context *head = &ctx->lock_context; - struct nfs_lock_context *pos = head; + struct nfs_lock_context *pos; - do { + list_for_each_entry_rcu(pos, &ctx->lock_context.list, list) { if (pos->lockowner != current->files) continue; - refcount_inc(&pos->count); - return pos; - } while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head); + if (refcount_inc_not_zero(&pos->count)) + return pos; + } return NULL; } @@ -874,10 +873,10 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) struct nfs_lock_context *res, *new = NULL; struct inode *inode = d_inode(ctx->dentry); - spin_lock(&inode->i_lock); + rcu_read_lock(); res = __nfs_find_lock_context(ctx); + rcu_read_unlock(); if (res == NULL) { - spin_unlock(&inode->i_lock); new = kmalloc(sizeof(*new), GFP_KERNEL); if (new == NULL) return ERR_PTR(-ENOMEM); @@ -885,14 +884,14 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) spin_lock(&inode->i_lock); res = __nfs_find_lock_context(ctx); if (res == NULL) { - list_add_tail(&new->list, &ctx->lock_context.list); + list_add_tail_rcu(&new->list, &ctx->lock_context.list); new->open_context = ctx; res = new; new = NULL; } + spin_unlock(&inode->i_lock); + kfree(new); } - spin_unlock(&inode->i_lock); - kfree(new); return res; } EXPORT_SYMBOL_GPL(nfs_get_lock_context); @@ -904,9 +903,9 @@ void nfs_put_lock_context(struct nfs_lock_context *l_ctx) if (!refcount_dec_and_lock(&l_ctx->count, &inode->i_lock)) return; - list_del(&l_ctx->list); + list_del_rcu(&l_ctx->list); spin_unlock(&inode->i_lock); - kfree(l_ctx); + kfree_rcu(l_ctx, rcu_head); } EXPORT_SYMBOL_GPL(nfs_put_lock_context); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a0831e9d19c9..d2f4f88a0e66 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -62,6 +62,7 @@ struct nfs_lock_context { struct nfs_open_context *open_context; fl_owner_t lockowner; atomic_t io_count; + struct rcu_head rcu_head; }; struct nfs4_state; -- cgit v1.2.3 From 6ba0c4e5bb08cc929662588aa2015e0ee7ee9376 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Sep 2018 15:34:37 -0400 Subject: NFS: Simplify internal check for whether file is open for write Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 09b3b7146ff4..052db41a7f80 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1328,19 +1328,11 @@ static bool nfs_file_has_writers(struct nfs_inode *nfsi) { struct inode *inode = &nfsi->vfs_inode; - assert_spin_locked(&inode->i_lock); - if (!S_ISREG(inode->i_mode)) return false; if (list_empty(&nfsi->open_files)) return false; - /* Note: This relies on nfsi->open_files being ordered with writers - * being placed at the head of the list. - * See nfs_inode_attach_open_context() - */ - return (list_first_entry(&nfsi->open_files, - struct nfs_open_context, - list)->mode & FMODE_WRITE) == FMODE_WRITE; + return inode_is_open_for_write(inode); } static bool nfs_file_has_buffered_writers(struct nfs_inode *nfsi) -- cgit v1.2.3 From 0de43976fbe716379084f954b1e370c35aa87bf0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Sep 2018 15:57:01 -0400 Subject: NFS: Convert lookups of the open context to RCU Reduce contention on the inode->i_lock by ensuring that we use RCU when looking up the NFS open context. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 11 ++++++----- fs/nfs/inode.c | 35 +++++++++++++++-------------------- fs/nfs/nfs4proc.c | 30 ++++++++++++++++++++++++------ fs/nfs/nfs4state.c | 12 ++++++------ fs/nfs/pnfs.c | 5 ++++- include/linux/nfs_fs.h | 1 + 6 files changed, 56 insertions(+), 38 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index f033f3a69a3b..76d205d1c7bc 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -136,8 +136,8 @@ static int nfs_delegation_claim_opens(struct inode *inode, int err; again: - spin_lock(&inode->i_lock); - list_for_each_entry(ctx, &nfsi->open_files, list) { + rcu_read_lock(); + list_for_each_entry_rcu(ctx, &nfsi->open_files, list) { state = ctx->state; if (state == NULL) continue; @@ -147,8 +147,9 @@ again: continue; if (!nfs4_stateid_match(&state->stateid, stateid)) continue; - get_nfs_open_context(ctx); - spin_unlock(&inode->i_lock); + if (!get_nfs_open_context(ctx)) + continue; + rcu_read_unlock(); sp = state->owner; /* Block nfs4_proc_unlck */ mutex_lock(&sp->so_delegreturn_mutex); @@ -164,7 +165,7 @@ again: return err; goto again; } - spin_unlock(&inode->i_lock); + rcu_read_unlock(); return 0; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 052db41a7f80..5b1eee4952b7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -977,9 +977,9 @@ EXPORT_SYMBOL_GPL(alloc_nfs_open_context); struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) { - if (ctx != NULL) - refcount_inc(&ctx->lock_context.count); - return ctx; + if (ctx != NULL && refcount_inc_not_zero(&ctx->lock_context.count)) + return ctx; + return NULL; } EXPORT_SYMBOL_GPL(get_nfs_open_context); @@ -988,13 +988,13 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) struct inode *inode = d_inode(ctx->dentry); struct super_block *sb = ctx->dentry->d_sb; + if (!refcount_dec_and_test(&ctx->lock_context.count)) + return; if (!list_empty(&ctx->list)) { - if (!refcount_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) - return; - list_del(&ctx->list); + spin_lock(&inode->i_lock); + list_del_rcu(&ctx->list); spin_unlock(&inode->i_lock); - } else if (!refcount_dec_and_test(&ctx->lock_context.count)) - return; + } if (inode != NULL) NFS_PROTO(inode)->close_context(ctx, is_sync); if (ctx->cred != NULL) @@ -1002,7 +1002,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) dput(ctx->dentry); nfs_sb_deactive(sb); kfree(ctx->mdsthreshold); - kfree(ctx); + kfree_rcu(ctx, rcu_head); } void put_nfs_open_context(struct nfs_open_context *ctx) @@ -1026,10 +1026,7 @@ void nfs_inode_attach_open_context(struct nfs_open_context *ctx) struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&inode->i_lock); - if (ctx->mode & FMODE_WRITE) - list_add(&ctx->list, &nfsi->open_files); - else - list_add_tail(&ctx->list, &nfsi->open_files); + list_add_tail_rcu(&ctx->list, &nfsi->open_files); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context); @@ -1050,16 +1047,17 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c struct nfs_inode *nfsi = NFS_I(inode); struct nfs_open_context *pos, *ctx = NULL; - spin_lock(&inode->i_lock); - list_for_each_entry(pos, &nfsi->open_files, list) { + rcu_read_lock(); + list_for_each_entry_rcu(pos, &nfsi->open_files, list) { if (cred != NULL && pos->cred != cred) continue; if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode) continue; ctx = get_nfs_open_context(pos); - break; + if (ctx) + break; } - spin_unlock(&inode->i_lock); + rcu_read_unlock(); return ctx; } @@ -1077,9 +1075,6 @@ void nfs_file_clear_open_context(struct file *filp) if (ctx->error < 0) invalidate_inode_pages2(inode->i_mapping); filp->private_data = NULL; - spin_lock(&inode->i_lock); - list_move_tail(&ctx->list, &NFS_I(inode)->open_files); - spin_unlock(&inode->i_lock); put_nfs_open_context_sync(ctx); } } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8220a168282e..10c20a5b075d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1933,23 +1933,41 @@ nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) return ret; } -static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state) +static struct nfs_open_context * +nfs4_state_find_open_context_mode(struct nfs4_state *state, fmode_t mode) { struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs_open_context *ctx; - spin_lock(&state->inode->i_lock); - list_for_each_entry(ctx, &nfsi->open_files, list) { + rcu_read_lock(); + list_for_each_entry_rcu(ctx, &nfsi->open_files, list) { if (ctx->state != state) continue; - get_nfs_open_context(ctx); - spin_unlock(&state->inode->i_lock); + if ((ctx->mode & mode) != mode) + continue; + if (!get_nfs_open_context(ctx)) + continue; + rcu_read_unlock(); return ctx; } - spin_unlock(&state->inode->i_lock); + rcu_read_unlock(); return ERR_PTR(-ENOENT); } +static struct nfs_open_context * +nfs4_state_find_open_context(struct nfs4_state *state) +{ + struct nfs_open_context *ctx; + + ctx = nfs4_state_find_open_context_mode(state, FMODE_READ|FMODE_WRITE); + if (!IS_ERR(ctx)) + return ctx; + ctx = nfs4_state_find_open_context_mode(state, FMODE_WRITE); + if (!IS_ERR(ctx)) + return ctx; + return nfs4_state_find_open_context_mode(state, FMODE_READ); +} + static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context *ctx, struct nfs4_state *state, enum open_claim_type4 claim) { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 40a08cd483f0..be92ce4259e9 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1437,8 +1437,8 @@ void nfs_inode_find_state_and_recover(struct inode *inode, struct nfs4_state *state; bool found = false; - spin_lock(&inode->i_lock); - list_for_each_entry(ctx, &nfsi->open_files, list) { + rcu_read_lock(); + list_for_each_entry_rcu(ctx, &nfsi->open_files, list) { state = ctx->state; if (state == NULL) continue; @@ -1456,7 +1456,7 @@ void nfs_inode_find_state_and_recover(struct inode *inode, nfs4_state_mark_reclaim_nograce(clp, state)) found = true; } - spin_unlock(&inode->i_lock); + rcu_read_unlock(); nfs_inode_find_delegation_state_and_recover(inode, stateid); if (found) @@ -1469,13 +1469,13 @@ static void nfs4_state_mark_open_context_bad(struct nfs4_state *state) struct nfs_inode *nfsi = NFS_I(inode); struct nfs_open_context *ctx; - spin_lock(&inode->i_lock); - list_for_each_entry(ctx, &nfsi->open_files, list) { + rcu_read_lock(); + list_for_each_entry_rcu(ctx, &nfsi->open_files, list) { if (ctx->state != state) continue; set_bit(NFS_CONTEXT_BAD, &ctx->flags); } - spin_unlock(&inode->i_lock); + rcu_read_unlock(); } static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c5672c02afd6..06cb90e9bc6e 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1339,6 +1339,7 @@ bool pnfs_roc(struct inode *ino, if (!nfs_have_layout(ino)) return false; retry: + rcu_read_lock(); spin_lock(&ino->i_lock); lo = nfsi->layout; if (!lo || !pnfs_layout_is_valid(lo) || @@ -1349,6 +1350,7 @@ retry: pnfs_get_layout_hdr(lo); if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) { spin_unlock(&ino->i_lock); + rcu_read_unlock(); wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, TASK_UNINTERRUPTIBLE); pnfs_put_layout_hdr(lo); @@ -1362,7 +1364,7 @@ retry: skip_read = true; } - list_for_each_entry(ctx, &nfsi->open_files, list) { + list_for_each_entry_rcu(ctx, &nfsi->open_files, list) { state = ctx->state; if (state == NULL) continue; @@ -1410,6 +1412,7 @@ retry: out_noroc: spin_unlock(&ino->i_lock); + rcu_read_unlock(); pnfs_layoutcommit_inode(ino, true); if (roc) { struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index d2f4f88a0e66..6e0417c02279 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -83,6 +83,7 @@ struct nfs_open_context { struct list_head list; struct nfs4_threshold *mdsthreshold; + struct rcu_head rcu_head; }; struct nfs_open_dir_context { -- cgit v1.2.3 From 9ae075fdd1901b60d87a0404f9c110753e16969a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Sep 2018 19:15:15 -0400 Subject: NFSv4: Convert open state lookup to use RCU Further reduce contention on the inode->i_lock. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4state.c | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 3a6904173214..1b4737f4cac4 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -191,6 +191,7 @@ struct nfs4_state { atomic_t count; wait_queue_head_t waitq; + struct rcu_head rcu_head; }; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index be92ce4259e9..7feac365038c 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -684,7 +684,7 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner) struct nfs_inode *nfsi = NFS_I(inode); struct nfs4_state *state; - list_for_each_entry(state, &nfsi->open_states, inode_states) { + list_for_each_entry_rcu(state, &nfsi->open_states, inode_states) { if (state->owner != owner) continue; if (!nfs4_valid_open_stateid(state)) @@ -698,7 +698,7 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner) static void nfs4_free_open_state(struct nfs4_state *state) { - kfree(state); + kfree_rcu(state, rcu_head); } struct nfs4_state * @@ -707,9 +707,9 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner) struct nfs4_state *state, *new; struct nfs_inode *nfsi = NFS_I(inode); - spin_lock(&inode->i_lock); + rcu_read_lock(); state = __nfs4_find_state_byowner(inode, owner); - spin_unlock(&inode->i_lock); + rcu_read_unlock(); if (state) goto out; new = nfs4_alloc_open_state(); @@ -720,7 +720,7 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner) state = new; state->owner = owner; atomic_inc(&owner->so_count); - list_add(&state->inode_states, &nfsi->open_states); + list_add_rcu(&state->inode_states, &nfsi->open_states); ihold(inode); state->inode = inode; spin_unlock(&inode->i_lock); @@ -746,7 +746,7 @@ void nfs4_put_open_state(struct nfs4_state *state) if (!atomic_dec_and_lock(&state->count, &owner->so_lock)) return; spin_lock(&inode->i_lock); - list_del(&state->inode_states); + list_del_rcu(&state->inode_states); list_del(&state->open_states); spin_unlock(&inode->i_lock); spin_unlock(&owner->so_lock); -- cgit v1.2.3 From ace9fad43aa60a88af4b57a8328f0958e3d07bf0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Sep 2018 19:19:07 -0400 Subject: NFSv4: Convert struct nfs4_state to use refcount_t Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 8 ++++---- fs/nfs/nfs4state.c | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 1b4737f4cac4..8d59c9655ec4 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -188,7 +188,7 @@ struct nfs4_state { unsigned int n_wronly; /* Number of write-only references */ unsigned int n_rdwr; /* Number of read/write references */ fmode_t state; /* State on the server (R,W, or RW) */ - atomic_t count; + refcount_t count; wait_queue_head_t waitq; struct rcu_head rcu_head; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 10c20a5b075d..4da59bd53f98 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1777,7 +1777,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) out: return ERR_PTR(ret); out_return_state: - atomic_inc(&state->count); + refcount_inc(&state->count); return state; } @@ -1849,7 +1849,7 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) update: update_open_stateid(state, &data->o_res.stateid, NULL, data->o_arg.fmode); - atomic_inc(&state->count); + refcount_inc(&state->count); return state; } @@ -1887,7 +1887,7 @@ nfs4_opendata_find_nfs4_state(struct nfs4_opendata *data) return ERR_CAST(inode); if (data->state != NULL && data->state->inode == inode) { state = data->state; - atomic_inc(&state->count); + refcount_inc(&state->count); } else state = nfs4_get_open_state(inode, data->owner); iput(inode); @@ -1978,7 +1978,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context if (opendata == NULL) return ERR_PTR(-ENOMEM); opendata->state = state; - atomic_inc(&state->count); + refcount_inc(&state->count); return opendata; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 7feac365038c..6ca3bd076073 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -655,7 +655,7 @@ nfs4_alloc_open_state(void) state = kzalloc(sizeof(*state), GFP_NOFS); if (!state) return NULL; - atomic_set(&state->count, 1); + refcount_set(&state->count, 1); INIT_LIST_HEAD(&state->lock_states); spin_lock_init(&state->state_lock); seqlock_init(&state->seqlock); @@ -689,7 +689,7 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner) continue; if (!nfs4_valid_open_stateid(state)) continue; - if (atomic_inc_not_zero(&state->count)) + if (refcount_inc_not_zero(&state->count)) return state; } return NULL; @@ -743,7 +743,7 @@ void nfs4_put_open_state(struct nfs4_state *state) struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; - if (!atomic_dec_and_lock(&state->count, &owner->so_lock)) + if (!refcount_dec_and_lock(&state->count, &owner->so_lock)) return; spin_lock(&inode->i_lock); list_del_rcu(&state->inode_states); @@ -1573,7 +1573,7 @@ restart: continue; if (state->state == 0) continue; - atomic_inc(&state->count); + refcount_inc(&state->count); spin_unlock(&sp->so_lock); status = ops->recover_open(sp, state); if (status >= 0) { -- cgit v1.2.3 From 943cff67b842839f4f35364ba2db5c2d3f025d94 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 18 Sep 2018 10:07:44 -0400 Subject: NFSv4.1: Fix the r/wsize checking The intention of nfs4_session_set_rwsize() was to cap the r/wsize to the buffer sizes negotiated by the CREATE_SESSION. The initial code had a bug whereby we would not check the values negotiated by nfs_probe_fsinfo() (the assumption being that CREATE_SESSION will always negotiate buffer values that are sane w.r.t. the server's preferred r/wsizes) but would only check values set by the user in the 'mount' command. The code was changed in 4.11 to _always_ set the r/wsize, meaning that we now never use the server preferred r/wsizes. This is the regression that this patch fixes. Also rename the function to nfs4_session_limit_rwsize() in order to avoid future confusion. Fixes: 033853325fe3 (NFSv4.1 respect server's max size in CREATE_SESSION") Cc: stable@vger.kernel.org # v4.11+ Signed-off-by: Trond Myklebust --- fs/nfs/nfs4client.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 146e30862234..8f53455c4765 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -950,10 +950,10 @@ EXPORT_SYMBOL_GPL(nfs4_set_ds_client); /* * Session has been established, and the client marked ready. - * Set the mount rsize and wsize with negotiated fore channel - * attributes which will be bound checked in nfs_server_set_fsinfo. + * Limit the mount rsize, wsize and dtsize using negotiated fore + * channel attributes. */ -static void nfs4_session_set_rwsize(struct nfs_server *server) +static void nfs4_session_limit_rwsize(struct nfs_server *server) { #ifdef CONFIG_NFS_V4_1 struct nfs4_session *sess; @@ -966,9 +966,11 @@ static void nfs4_session_set_rwsize(struct nfs_server *server) server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead; server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead; - if (!server->rsize || server->rsize > server_resp_sz) + if (server->dtsize > server_resp_sz) + server->dtsize = server_resp_sz; + if (server->rsize > server_resp_sz) server->rsize = server_resp_sz; - if (!server->wsize || server->wsize > server_rqst_sz) + if (server->wsize > server_rqst_sz) server->wsize = server_rqst_sz; #endif /* CONFIG_NFS_V4_1 */ } @@ -1015,12 +1017,12 @@ static int nfs4_server_common_setup(struct nfs_server *server, (unsigned long long) server->fsid.minor); nfs_display_fhandle(mntfh, "Pseudo-fs root FH"); - nfs4_session_set_rwsize(server); - error = nfs_probe_fsinfo(server, mntfh, fattr); if (error < 0) goto out; + nfs4_session_limit_rwsize(server); + if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) server->namelen = NFS4_MAXNAMLEN; -- cgit v1.2.3 From 10ec57e4c500007fb54b7030f1194121794d48eb Mon Sep 17 00:00:00 2001 From: Tigran Mkrtchyan Date: Mon, 20 Aug 2018 08:56:08 +0200 Subject: nfs4: flex_file: ignore synthetic uid/gid for tightly coupled DSes for tightly coupled DSes client must ignore provided synthetic uid and gid as stated in draft-ietf-nfsv4-flex-files-19#section-5.1. Signed-off-by: Tigran Mkrtchyan Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 59aa04976331..74d8d5352438 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -453,7 +453,7 @@ ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx, struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); struct rpc_cred *cred; - if (mirror) { + if (mirror && !mirror->mirror_ds->ds_versions[0].tightly_coupled) { cred = ff_layout_get_mirror_cred(mirror, lseg->pls_range.iomode); if (!cred) cred = get_rpccred(mdscred); -- cgit v1.2.3 From cb7a8384dc0202d5490302b7e948ede498aad720 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 11 Sep 2018 16:23:46 -0400 Subject: NFS: Split out the body of nfs4_reclaim_open_state() Moving all of this into a new function removes the need for cramped indentation, making the code overall easier to look at. I also take this chance to switch copy recovery over to using nfs4_stateid_match_other() Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 83 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 47 insertions(+), 36 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 6ca3bd076073..8c7fe222f4c4 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1549,10 +1549,51 @@ out: return status; } +static int __nfs4_reclaim_open_state(struct nfs4_state_owner *sp, struct nfs4_state *state, + const struct nfs4_state_recovery_ops *ops) +{ + struct nfs4_lock_state *lock; + int status; + + status = ops->recover_open(sp, state); + if (status < 0) + return status; + + status = nfs4_reclaim_locks(state, ops); + if (status < 0) + return status; + + if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) { + spin_lock(&state->state_lock); + list_for_each_entry(lock, &state->lock_states, ls_locks) { + if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags)) + pr_warn_ratelimited("NFS: %s: Lock reclaim failed!\n", __func__); + } + spin_unlock(&state->state_lock); + } + +#ifdef CONFIG_NFS_V4_2 + if (test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags)) { + struct nfs4_copy_state *copy; + spin_lock(&sp->so_server->nfs_client->cl_lock); + list_for_each_entry(copy, &sp->so_server->ss_copies, copies) { + if (nfs4_stateid_match_other(&state->stateid, ©->parent_state->stateid)) + continue; + copy->flags = 1; + complete(©->completion); + break; + } + spin_unlock(&sp->so_server->nfs_client->cl_lock); + } +#endif /* CONFIG_NFS_V4_2 */ + + clear_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); + return status; +} + static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs4_state_recovery_ops *ops) { struct nfs4_state *state; - struct nfs4_lock_state *lock; int status = 0; /* Note: we rely on the sp->so_states list being ordered @@ -1575,43 +1616,13 @@ restart: continue; refcount_inc(&state->count); spin_unlock(&sp->so_lock); - status = ops->recover_open(sp, state); + status = __nfs4_reclaim_open_state(sp, state, ops); if (status >= 0) { - status = nfs4_reclaim_locks(state, ops); - if (status >= 0) { - if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) { - spin_lock(&state->state_lock); - list_for_each_entry(lock, &state->lock_states, ls_locks) { - if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags)) - pr_warn_ratelimited("NFS: " - "%s: Lock reclaim " - "failed!\n", __func__); - } - spin_unlock(&state->state_lock); - } - clear_bit(NFS_STATE_RECLAIM_NOGRACE, - &state->flags); -#ifdef CONFIG_NFS_V4_2 - if (test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags)) { - struct nfs4_copy_state *copy; - - spin_lock(&sp->so_server->nfs_client->cl_lock); - list_for_each_entry(copy, &sp->so_server->ss_copies, copies) { - if (memcmp(&state->stateid.other, ©->parent_state->stateid.other, NFS4_STATEID_SIZE)) - continue; - copy->flags = 1; - complete(©->completion); - printk("AGLO: server rebooted waking up the copy\n"); - break; - } - spin_unlock(&sp->so_server->nfs_client->cl_lock); - } -#endif /* CONFIG_NFS_V4_2 */ - nfs4_put_open_state(state); - spin_lock(&sp->so_lock); - goto restart; - } + nfs4_put_open_state(state); + spin_lock(&sp->so_lock); + goto restart; } + switch (status) { default: printk(KERN_ERR "NFS: %s: unhandled error %d\n", -- cgit v1.2.3 From 35a61606a61249fb59cbf566f869c026f5fafec4 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 11 Sep 2018 16:23:47 -0400 Subject: NFS: Reduce indentation of the switch statement in nfs4_reclaim_open_state() Most places in the kernel tend to line up cases with the switch to reduce indentation, so move this over to match that style. Additionally, I handle the (status >= 0) case in the switch so that we only "goto restart" from a single place after error handling. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 70 +++++++++++++++++++++++++----------------------------- 1 file changed, 33 insertions(+), 37 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 8c7fe222f4c4..cc27f11de2ec 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1617,46 +1617,42 @@ restart: refcount_inc(&state->count); spin_unlock(&sp->so_lock); status = __nfs4_reclaim_open_state(sp, state, ops); - if (status >= 0) { - nfs4_put_open_state(state); - spin_lock(&sp->so_lock); - goto restart; - } switch (status) { - default: - printk(KERN_ERR "NFS: %s: unhandled error %d\n", - __func__, status); - /* Fall through */ - case -ENOENT: - case -ENOMEM: - case -EACCES: - case -EROFS: - case -EIO: - case -ESTALE: - /* Open state on this file cannot be recovered */ - nfs4_state_mark_recovery_failed(state, status); - break; - case -EAGAIN: - ssleep(1); - /* Fall through */ - case -NFS4ERR_ADMIN_REVOKED: - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_OLD_STATEID: - case -NFS4ERR_BAD_STATEID: - case -NFS4ERR_RECLAIM_BAD: - case -NFS4ERR_RECLAIM_CONFLICT: - nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state); + default: + if (status >= 0) break; - case -NFS4ERR_EXPIRED: - case -NFS4ERR_NO_GRACE: - nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state); - case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - goto out_err; + printk(KERN_ERR "NFS: %s: unhandled error %d\n", __func__, status); + /* Fall through */ + case -ENOENT: + case -ENOMEM: + case -EACCES: + case -EROFS: + case -EIO: + case -ESTALE: + /* Open state on this file cannot be recovered */ + nfs4_state_mark_recovery_failed(state, status); + break; + case -EAGAIN: + ssleep(1); + /* Fall through */ + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_OLD_STATEID: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_RECLAIM_BAD: + case -NFS4ERR_RECLAIM_CONFLICT: + nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state); + break; + case -NFS4ERR_EXPIRED: + case -NFS4ERR_NO_GRACE: + nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state); + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + goto out_err; } nfs4_put_open_state(state); spin_lock(&sp->so_lock); -- cgit v1.2.3 From 000d3f9566edfa48d592198257283a4465f87ffd Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Tue, 11 Sep 2018 16:23:48 -0400 Subject: NFS: Reduce indentation of nfs4_recovery_handle_error() This is to match kernel coding style for switch statements. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 64 +++++++++++++++++++++++++++--------------------------- 1 file changed, 32 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index cc27f11de2ec..91fd4049cafc 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1802,38 +1802,38 @@ static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp) static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) { switch (error) { - case 0: - break; - case -NFS4ERR_CB_PATH_DOWN: - nfs40_handle_cb_pathdown(clp); - break; - case -NFS4ERR_NO_GRACE: - nfs4_state_end_reclaim_reboot(clp); - break; - case -NFS4ERR_STALE_CLIENTID: - set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); - nfs4_state_start_reclaim_reboot(clp); - break; - case -NFS4ERR_EXPIRED: - set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); - nfs4_state_start_reclaim_nograce(clp); - break; - case -NFS4ERR_BADSESSION: - case -NFS4ERR_BADSLOT: - case -NFS4ERR_BAD_HIGH_SLOT: - case -NFS4ERR_DEADSESSION: - case -NFS4ERR_SEQ_FALSE_RETRY: - case -NFS4ERR_SEQ_MISORDERED: - set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); - /* Zero session reset errors */ - break; - case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: - set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); - break; - default: - dprintk("%s: failed to handle error %d for server %s\n", - __func__, error, clp->cl_hostname); - return error; + case 0: + break; + case -NFS4ERR_CB_PATH_DOWN: + nfs40_handle_cb_pathdown(clp); + break; + case -NFS4ERR_NO_GRACE: + nfs4_state_end_reclaim_reboot(clp); + break; + case -NFS4ERR_STALE_CLIENTID: + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + nfs4_state_start_reclaim_reboot(clp); + break; + case -NFS4ERR_EXPIRED: + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + nfs4_state_start_reclaim_nograce(clp); + break; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_SEQ_FALSE_RETRY: + case -NFS4ERR_SEQ_MISORDERED: + set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + /* Zero session reset errors */ + break; + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state); + break; + default: + dprintk("%s: failed to handle error %d for server %s\n", + __func__, error, clp->cl_hostname); + return error; } dprintk("%s: handled error %d for server %s\n", __func__, error, clp->cl_hostname); -- cgit v1.2.3 From 80f42368868e082c5c1dcca6ada94f6b8eab4991 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 20 Sep 2018 16:12:13 -0400 Subject: NFSv4: Split out NFS v4.2 copy completion functions The convention in the rest of the code is to have a separate function for anything that might be ifdef-ed out. Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 91fd4049cafc..62ae0fd345ad 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1549,6 +1549,31 @@ out: return status; } +#ifdef CONFIG_NFS_V4_2 +static void nfs42_complete_copies(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + struct nfs4_copy_state *copy; + + if (!test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags)) + return; + + spin_lock(&sp->so_server->nfs_client->cl_lock); + list_for_each_entry(copy, &sp->so_server->ss_copies, copies) { + if (nfs4_stateid_match_other(&state->stateid, ©->parent_state->stateid)) + continue; + copy->flags = 1; + complete(©->completion); + break; + } + spin_unlock(&sp->so_server->nfs_client->cl_lock); +} +#else /* !CONFIG_NFS_V4_2 */ +static inline void nfs42_complete_copies(struct nfs4_state_owner *sp, + struct nfs4_state *state) +{ +} +#endif /* CONFIG_NFS_V4_2 */ + static int __nfs4_reclaim_open_state(struct nfs4_state_owner *sp, struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) { @@ -1572,21 +1597,7 @@ static int __nfs4_reclaim_open_state(struct nfs4_state_owner *sp, struct nfs4_st spin_unlock(&state->state_lock); } -#ifdef CONFIG_NFS_V4_2 - if (test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags)) { - struct nfs4_copy_state *copy; - spin_lock(&sp->so_server->nfs_client->cl_lock); - list_for_each_entry(copy, &sp->so_server->ss_copies, copies) { - if (nfs4_stateid_match_other(&state->stateid, ©->parent_state->stateid)) - continue; - copy->flags = 1; - complete(©->completion); - break; - } - spin_unlock(&sp->so_server->nfs_client->cl_lock); - } -#endif /* CONFIG_NFS_V4_2 */ - + nfs42_complete_copies(sp, state); clear_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); return status; } -- cgit v1.2.3 From 8d8928d87960d71f898767185b8c0e4ce3de3cbe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Mar 2018 12:03:00 -0500 Subject: NFSv3: Improve NFSv3 performance when server returns no post-op attributes When the server fails to return post-op attributes, the client's attempt to place read data directly in the page cache fails, and so we have to do an extra copy in order to realign the data with page borders. This patch attempts to detect servers that don't return post-op attributes on read (e.g. for pNFS) and adjusts the placement calculation accordingly. Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 5 +++++ fs/nfs/nfs3xdr.c | 6 +++++- include/linux/nfs_fs_sb.h | 3 +++ include/linux/nfs_xdr.h | 3 ++- 4 files changed, 15 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index ec8a9efa268f..71bc16225b98 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -786,6 +786,7 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { struct inode *inode = hdr->inode; + struct nfs_server *server = NFS_SERVER(inode); if (hdr->pgio_done_cb != NULL) return hdr->pgio_done_cb(task, hdr); @@ -793,6 +794,9 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) if (nfs3_async_handle_jukebox(task, inode)) return -EAGAIN; + if (task->tk_status >= 0 && !server->read_hdrsize) + cmpxchg(&server->read_hdrsize, 0, hdr->res.replen); + nfs_invalidate_atime(inode); nfs_refresh_inode(inode, &hdr->fattr); return 0; @@ -802,6 +806,7 @@ static void nfs3_proc_read_setup(struct nfs_pgio_header *hdr, struct rpc_message *msg) { msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ]; + hdr->args.replen = NFS_SERVER(hdr->inode)->read_hdrsize; } static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index d8c4c10b15f7..78df4eb60f85 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -983,10 +983,11 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req, const void *data) { const struct nfs_pgio_args *args = data; + unsigned int replen = args->replen ? args->replen : NFS3_readres_sz; encode_read3args(xdr, args); prepare_reply_buffer(req, args->pages, args->pgbase, - args->count, NFS3_readres_sz); + args->count, replen); req->rq_rcv_buf.flags |= XDRBUF_READ; } @@ -1675,9 +1676,11 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, void *data) { struct nfs_pgio_res *result = data; + unsigned int pos; enum nfs_stat status; int error; + pos = xdr_stream_pos(xdr); error = decode_nfsstat3(xdr, &status); if (unlikely(error)) goto out; @@ -1687,6 +1690,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, result->op_status = status; if (status != NFS3_OK) goto out_status; + result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2); error = decode_read3resok(xdr, result); out: return error; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index bf39d9c92201..0fc0b9135d46 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -228,6 +228,9 @@ struct nfs_server { unsigned short mountd_port; unsigned short mountd_protocol; struct rpc_wait_queue uoc_rpcwaitq; + + /* XDR related information */ + unsigned int read_hdrsize; }; /* Server capabilities */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index bd1c889a9ed9..7f5535e5e852 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -608,6 +608,7 @@ struct nfs_pgio_args { __u32 count; unsigned int pgbase; struct page ** pages; + unsigned int replen; /* used by read */ const u32 * bitmask; /* used by write */ enum nfs3_stable_how stable; /* used by write */ }; @@ -618,9 +619,9 @@ struct nfs_pgio_res { __u32 count; __u32 op_status; int eof; /* used by read */ + unsigned int replen; /* used by read */ struct nfs_writeverf * verf; /* used by write */ const struct nfs_server *server; /* used by write */ - }; /* -- cgit v1.2.3 From 1c6c4b740df12f2162ae5c3fac337137e2776236 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 25 Sep 2018 12:34:43 -0400 Subject: NFS: Remove private spinlock in struct nfs_pgio_header Now that each struct nfs_pgio_header corresponds to one RPC call, we only have one writer to the struct nfs_pgio_header. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 13 ++++++------- fs/nfs/read.c | 10 ++++------ include/linux/nfs_xdr.h | 5 ++--- 3 files changed, 12 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index bb5476a6d264..f97c455f5734 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -63,14 +63,14 @@ EXPORT_SYMBOL_GPL(nfs_pgheader_init); void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos) { - spin_lock(&hdr->lock); - if (!test_and_set_bit(NFS_IOHDR_ERROR, &hdr->flags) - || pos < hdr->io_start + hdr->good_bytes) { + unsigned int new = pos - hdr->io_start; + + if (hdr->good_bytes > new) { + hdr->good_bytes = new; clear_bit(NFS_IOHDR_EOF, &hdr->flags); - hdr->good_bytes = pos - hdr->io_start; - hdr->error = error; + if (!test_and_set_bit(NFS_IOHDR_ERROR, &hdr->flags)) + hdr->error = error; } - spin_unlock(&hdr->lock); } static inline struct nfs_page * @@ -494,7 +494,6 @@ struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops) if (hdr) { INIT_LIST_HEAD(&hdr->pages); - spin_lock_init(&hdr->lock); hdr->rw_ops = ops; } return hdr; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 48d7277c60a9..f9f19784db82 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -276,16 +276,14 @@ static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_header *hdr) { if (hdr->res.eof) { - loff_t bound; + loff_t pos = hdr->args.offset + hdr->res.count; + unsigned int new = pos - hdr->io_start; - bound = hdr->args.offset + hdr->res.count; - spin_lock(&hdr->lock); - if (bound < hdr->io_start + hdr->good_bytes) { + if (hdr->good_bytes > new) { + hdr->good_bytes = new; set_bit(NFS_IOHDR_EOF, &hdr->flags); clear_bit(NFS_IOHDR_ERROR, &hdr->flags); - hdr->good_bytes = bound - hdr->io_start; } - spin_unlock(&hdr->lock); } else if (hdr->res.count < hdr->args.count) nfs_readpage_retry(task, hdr); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 343e44166346..0e016252cfc6 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1482,11 +1482,10 @@ struct nfs_pgio_header { const struct nfs_rw_ops *rw_ops; struct nfs_io_completion *io_completion; struct nfs_direct_req *dreq; - spinlock_t lock; - /* fields protected by lock */ + int pnfs_error; int error; /* merge with pnfs_error */ - unsigned long good_bytes; /* boundary of good data */ + unsigned int good_bytes; /* boundary of good data */ unsigned long flags; /* -- cgit v1.2.3 From be189f7e7f03de35887e5a85ddcf39b91b5d7fc1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 27 Sep 2018 17:12:33 -0400 Subject: NFS: Fix dentry revalidation on NFSv4 lookup We need to ensure that inode and dentry revalidation occurs correctly on reopen of a file that is already open. Currently, we can end up not revalidating either in the case of NFSv4.0, due to the 'cached open' path. Let's fix that by ensuring that we only do cached open for the special cases of open recovery and delegation return. Reported-by: Stan Hu Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4da59bd53f98..db84b4adbc49 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1349,12 +1349,20 @@ static bool nfs4_mode_match_open_stateid(struct nfs4_state *state, return false; } -static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode) +static int can_open_cached(struct nfs4_state *state, fmode_t mode, + int open_mode, enum open_claim_type4 claim) { int ret = 0; if (open_mode & (O_EXCL|O_TRUNC)) goto out; + switch (claim) { + case NFS4_OPEN_CLAIM_NULL: + case NFS4_OPEN_CLAIM_FH: + goto out; + default: + break; + } switch (mode & (FMODE_READ|FMODE_WRITE)) { case FMODE_READ: ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0 @@ -1747,7 +1755,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) for (;;) { spin_lock(&state->owner->so_lock); - if (can_open_cached(state, fmode, open_mode)) { + if (can_open_cached(state, fmode, open_mode, claim)) { update_open_stateflags(state, fmode); spin_unlock(&state->owner->so_lock); goto out_return_state; @@ -2294,7 +2302,8 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) if (data->state != NULL) { struct nfs_delegation *delegation; - if (can_open_cached(data->state, data->o_arg.fmode, data->o_arg.open_flags)) + if (can_open_cached(data->state, data->o_arg.fmode, + data->o_arg.open_flags, claim)) goto out_no_action; rcu_read_lock(); delegation = rcu_dereference(NFS_I(data->state->inode)->delegation); -- cgit v1.2.3 From 5ceb9d7fdaaf6d8ced6cd7861cf1deb9cd93fa47 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 28 Sep 2018 09:04:05 -0400 Subject: NFS: Refactor nfs_lookup_revalidate() Refactor the code in nfs_lookup_revalidate() as a stepping stone towards optimising and fixing nfs4_lookup_revalidate(). Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 222 +++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 126 insertions(+), 96 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 8bfaa658b2c1..2e6a253e4104 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1072,6 +1072,100 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU); } +static int +nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry, + struct inode *inode, int error) +{ + switch (error) { + case 1: + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n", + __func__, dentry); + return 1; + case 0: + nfs_mark_for_revalidate(dir); + if (inode && S_ISDIR(inode->i_mode)) { + /* Purge readdir caches. */ + nfs_zap_caches(inode); + /* + * We can't d_drop the root of a disconnected tree: + * its d_hash is on the s_anon list and d_drop() would hide + * it from shrink_dcache_for_unmount(), leading to busy + * inodes on unmount and further oopses. + */ + if (IS_ROOT(dentry)) + return 1; + } + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n", + __func__, dentry); + return 0; + } + dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n", + __func__, dentry, error); + return error; +} + +static int +nfs_lookup_revalidate_negative(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + int ret = 1; + if (nfs_neg_need_reval(dir, dentry, flags)) { + if (flags & LOOKUP_RCU) + return -ECHILD; + ret = 0; + } + return nfs_lookup_revalidate_done(dir, dentry, NULL, ret); +} + +static int +nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry, + struct inode *inode) +{ + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + return nfs_lookup_revalidate_done(dir, dentry, inode, 1); +} + +static int +nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry, + struct inode *inode) +{ + struct nfs_fh *fhandle; + struct nfs_fattr *fattr; + struct nfs4_label *label; + int ret; + + ret = -ENOMEM; + fhandle = nfs_alloc_fhandle(); + fattr = nfs_alloc_fattr(); + label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL); + if (fhandle == NULL || fattr == NULL || IS_ERR(label)) + goto out; + + ret = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); + if (ret < 0) { + if (ret == -ESTALE || ret == -ENOENT) + ret = 0; + goto out; + } + ret = 0; + if (nfs_compare_fh(NFS_FH(inode), fhandle)) + goto out; + if (nfs_refresh_inode(inode, fattr) < 0) + goto out; + + nfs_setsecurity(inode, fattr, label); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + + /* set a readdirplus hint that we had a cache miss */ + nfs_force_use_readdirplus(dir); + ret = 1; +out: + nfs_free_fattr(fattr); + nfs_free_fhandle(fhandle); + nfs4_label_free(label); + return nfs_lookup_revalidate_done(dir, dentry, inode, ret); +} + /* * This is called every time the dcache has a lookup hit, * and we should check whether we can really trust that @@ -1083,58 +1177,36 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, * If the parent directory is seen to have changed, we throw out the * cached dentry and do a new lookup. */ -static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) +static int +nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, + unsigned int flags) { - struct inode *dir; struct inode *inode; - struct dentry *parent; - struct nfs_fh *fhandle = NULL; - struct nfs_fattr *fattr = NULL; - struct nfs4_label *label = NULL; int error; - if (flags & LOOKUP_RCU) { - parent = READ_ONCE(dentry->d_parent); - dir = d_inode_rcu(parent); - if (!dir) - return -ECHILD; - } else { - parent = dget_parent(dentry); - dir = d_inode(parent); - } nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); inode = d_inode(dentry); - if (!inode) { - if (nfs_neg_need_reval(dir, dentry, flags)) { - if (flags & LOOKUP_RCU) - return -ECHILD; - goto out_bad; - } - goto out_valid; - } + if (!inode) + return nfs_lookup_revalidate_negative(dir, dentry, flags); if (is_bad_inode(inode)) { - if (flags & LOOKUP_RCU) - return -ECHILD; dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n", __func__, dentry); goto out_bad; } if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ)) - goto out_set_verifier; + return nfs_lookup_revalidate_delegated(dir, dentry, inode); /* Force a full look up iff the parent directory has changed */ if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) && nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) { error = nfs_lookup_verify_inode(inode, flags); if (error) { - if (flags & LOOKUP_RCU) - return -ECHILD; if (error == -ESTALE) - goto out_zap_parent; - goto out_error; + nfs_zap_caches(dir); + goto out_bad; } nfs_advise_use_readdirplus(dir); goto out_valid; @@ -1146,81 +1218,39 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) if (NFS_STALE(inode)) goto out_bad; - error = -ENOMEM; - fhandle = nfs_alloc_fhandle(); - fattr = nfs_alloc_fattr(); - if (fhandle == NULL || fattr == NULL) - goto out_error; - - label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT); - if (IS_ERR(label)) - goto out_error; - trace_nfs_lookup_revalidate_enter(dir, dentry, flags); - error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); + error = nfs_lookup_revalidate_dentry(dir, dentry, inode); trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error); - if (error == -ESTALE || error == -ENOENT) - goto out_bad; - if (error) - goto out_error; - if (nfs_compare_fh(NFS_FH(inode), fhandle)) - goto out_bad; - if ((error = nfs_refresh_inode(inode, fattr)) != 0) - goto out_bad; - - nfs_setsecurity(inode, fattr, label); - - nfs_free_fattr(fattr); - nfs_free_fhandle(fhandle); - nfs4_label_free(label); + return error; +out_valid: + return nfs_lookup_revalidate_done(dir, dentry, inode, 1); +out_bad: + if (flags & LOOKUP_RCU) + return -ECHILD; + return nfs_lookup_revalidate_done(dir, dentry, inode, 0); +} - /* set a readdirplus hint that we had a cache miss */ - nfs_force_use_readdirplus(dir); +static int +nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct dentry *parent; + struct inode *dir; + int ret; -out_set_verifier: - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - out_valid: if (flags & LOOKUP_RCU) { + parent = READ_ONCE(dentry->d_parent); + dir = d_inode_rcu(parent); + if (!dir) + return -ECHILD; + ret = nfs_do_lookup_revalidate(dir, dentry, flags); if (parent != READ_ONCE(dentry->d_parent)) return -ECHILD; - } else + } else { + parent = dget_parent(dentry); + ret = nfs_do_lookup_revalidate(d_inode(parent), dentry, flags); dput(parent); - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n", - __func__, dentry); - return 1; -out_zap_parent: - nfs_zap_caches(dir); - out_bad: - WARN_ON(flags & LOOKUP_RCU); - nfs_free_fattr(fattr); - nfs_free_fhandle(fhandle); - nfs4_label_free(label); - nfs_mark_for_revalidate(dir); - if (inode && S_ISDIR(inode->i_mode)) { - /* Purge readdir caches. */ - nfs_zap_caches(inode); - /* - * We can't d_drop the root of a disconnected tree: - * its d_hash is on the s_anon list and d_drop() would hide - * it from shrink_dcache_for_unmount(), leading to busy - * inodes on unmount and further oopses. - */ - if (IS_ROOT(dentry)) - goto out_valid; } - dput(parent); - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n", - __func__, dentry); - return 0; -out_error: - WARN_ON(flags & LOOKUP_RCU); - nfs_free_fattr(fattr); - nfs_free_fhandle(fhandle); - nfs4_label_free(label); - dput(parent); - dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n", - __func__, dentry, error); - return error; + return ret; } /* -- cgit v1.2.3 From c7944ebb9ce9461079659e9e6ec5baaf73724b3b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 28 Sep 2018 12:42:51 -0400 Subject: NFSv4: Fix lookup revalidate of regular files If we're revalidating an existing dentry in order to open a file, we need to ensure that we check the directory has not changed before we optimise away the lookup. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 79 ++++++++++++++++++++++++++++++------------------------------ 1 file changed, 39 insertions(+), 40 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 2e6a253e4104..71b2e390becf 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1231,7 +1231,8 @@ out_bad: } static int -nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) +__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags, + int (*reval)(struct inode *, struct dentry *, unsigned int)) { struct dentry *parent; struct inode *dir; @@ -1242,17 +1243,22 @@ nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) dir = d_inode_rcu(parent); if (!dir) return -ECHILD; - ret = nfs_do_lookup_revalidate(dir, dentry, flags); + ret = reval(dir, dentry, flags); if (parent != READ_ONCE(dentry->d_parent)) return -ECHILD; } else { parent = dget_parent(dentry); - ret = nfs_do_lookup_revalidate(d_inode(parent), dentry, flags); + ret = reval(d_inode(parent), dentry, flags); dput(parent); } return ret; } +static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) +{ + return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate); +} + /* * A weaker form of d_revalidate for revalidating just the d_inode(dentry) * when we don't really care about the dentry name. This is called when a @@ -1609,62 +1615,55 @@ no_open: } EXPORT_SYMBOL_GPL(nfs_atomic_open); -static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) +static int +nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, + unsigned int flags) { struct inode *inode; - int ret = 0; if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY)) - goto no_open; + goto full_reval; if (d_mountpoint(dentry)) - goto no_open; - if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1) - goto no_open; + goto full_reval; inode = d_inode(dentry); /* We can't create new files in nfs_open_revalidate(), so we * optimize away revalidation of negative dentries. */ - if (inode == NULL) { - struct dentry *parent; - struct inode *dir; - - if (flags & LOOKUP_RCU) { - parent = READ_ONCE(dentry->d_parent); - dir = d_inode_rcu(parent); - if (!dir) - return -ECHILD; - } else { - parent = dget_parent(dentry); - dir = d_inode(parent); - } - if (!nfs_neg_need_reval(dir, dentry, flags)) - ret = 1; - else if (flags & LOOKUP_RCU) - ret = -ECHILD; - if (!(flags & LOOKUP_RCU)) - dput(parent); - else if (parent != READ_ONCE(dentry->d_parent)) - return -ECHILD; - goto out; - } + if (inode == NULL) + goto full_reval; + + if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ)) + return nfs_lookup_revalidate_delegated(dir, dentry, inode); /* NFS only supports OPEN on regular files */ if (!S_ISREG(inode->i_mode)) - goto no_open; + goto full_reval; + /* We cannot do exclusive creation on a positive dentry */ - if (flags & LOOKUP_EXCL) - goto no_open; + if (flags & (LOOKUP_EXCL | LOOKUP_REVAL)) + goto reval_dentry; + + /* Check if the directory changed */ + if (!nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) + goto reval_dentry; /* Let f_op->open() actually open (and revalidate) the file */ - ret = 1; + return 1; +reval_dentry: + if (flags & LOOKUP_RCU) + return -ECHILD; + return nfs_lookup_revalidate_dentry(dir, dentry, inode);; -out: - return ret; +full_reval: + return nfs_do_lookup_revalidate(dir, dentry, flags); +} -no_open: - return nfs_lookup_revalidate(dentry, flags); +static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) +{ + return __nfs_lookup_revalidate(dentry, flags, + nfs4_do_lookup_revalidate); } #endif /* CONFIG_NFSV4 */ -- cgit v1.2.3 From 44f411c353bf6d98d5a34f8f1b8605d43b2e50b8 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 4 Oct 2018 14:45:00 -0400 Subject: NFSv4.x: fix lock recovery during delegation recall Running "./nfstest_delegation --runtest recall26" uncovers that client doesn't recover the lock when we have an appending open, where the initial open got a write delegation. Instead of checking for the passed in open context against the file lock's open context. Check that the state is the same. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 76d205d1c7bc..07b839560576 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -93,7 +93,7 @@ int nfs4_check_delegation(struct inode *inode, fmode_t flags) return nfs4_do_check_delegation(inode, flags, false); } -static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) +static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid) { struct inode *inode = state->inode; struct file_lock *fl; @@ -108,7 +108,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ spin_lock(&flctx->flc_lock); restart: list_for_each_entry(fl, list, fl_list) { - if (nfs_file_open_context(fl->fl_file) != ctx) + if (nfs_file_open_context(fl->fl_file)->state != state) continue; spin_unlock(&flctx->flc_lock); status = nfs4_lock_delegation_recall(fl, state, stateid); @@ -156,7 +156,7 @@ again: seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); err = nfs4_open_delegation_recall(ctx, state, stateid, type); if (!err) - err = nfs_delegation_claim_locks(ctx, state, stateid); + err = nfs_delegation_claim_locks(state, stateid); if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) err = -EAGAIN; mutex_unlock(&sp->so_delegreturn_mutex); -- cgit v1.2.3 From fdbd1a2e4a71adcb1ae219fcfd964930d77a7f84 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 18 Oct 2018 15:01:48 -0400 Subject: nfs: Fix a missed page unlock after pg_doio() We must check pg_error and call error_cleanup after any call to pg_doio. Currently, we are skipping the unlock of a page if we encounter an error in nfs_pageio_complete() before handing off the work to the RPC layer. Signed-off-by: Benjamin Coddington Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index f97c455f5734..3b50edf6f883 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1110,6 +1110,20 @@ static int nfs_pageio_add_request_mirror(struct nfs_pageio_descriptor *desc, return ret; } +static void nfs_pageio_error_cleanup(struct nfs_pageio_descriptor *desc) +{ + u32 midx; + struct nfs_pgio_mirror *mirror; + + if (!desc->pg_error) + return; + + for (midx = 0; midx < desc->pg_mirror_count; midx++) { + mirror = &desc->pg_mirrors[midx]; + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); + } +} + int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { @@ -1160,25 +1174,11 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, return 1; out_failed: - /* - * We might have failed before sending any reqs over wire. - * Clean up rest of the reqs in mirror pg_list. - */ - if (desc->pg_error) { - struct nfs_pgio_mirror *mirror; - void (*func)(struct list_head *); - - /* remember fatal errors */ - if (nfs_error_is_fatal(desc->pg_error)) - nfs_context_set_write_error(req->wb_context, - desc->pg_error); - - func = desc->pg_completion_ops->error_cleanup; - for (midx = 0; midx < desc->pg_mirror_count; midx++) { - mirror = &desc->pg_mirrors[midx]; - func(&mirror->pg_list); - } - } + /* remember fatal errors */ + if (nfs_error_is_fatal(desc->pg_error)) + nfs_context_set_write_error(req->wb_context, + desc->pg_error); + nfs_pageio_error_cleanup(desc); return 0; } @@ -1250,6 +1250,8 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) for (midx = 0; midx < desc->pg_mirror_count; midx++) nfs_pageio_complete_mirror(desc, midx); + if (desc->pg_error < 0) + nfs_pageio_error_cleanup(desc); if (desc->pg_ops->pg_cleanup) desc->pg_ops->pg_cleanup(desc); nfs_pageio_cleanup_mirroring(desc); -- cgit v1.2.3 From fc187514d8af3a676c7bd7922439f9f5e5c6223f Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 18 Oct 2018 15:01:49 -0400 Subject: nfs: remove redundant call to nfs_context_set_write_error() We don't need to call this in the direct, read, or pnfs resend paths and the only other caller is the write path in nfs_page_async_flush() which already checks and sets the pg_error on the context. Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 3b50edf6f883..5c4568a0804b 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1174,10 +1174,6 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, return 1; out_failed: - /* remember fatal errors */ - if (nfs_error_is_fatal(desc->pg_error)) - nfs_context_set_write_error(req->wb_context, - desc->pg_error); nfs_pageio_error_cleanup(desc); return 0; } -- cgit v1.2.3 From 86bbd7422ae6a33735df6846fd685e46686da714 Mon Sep 17 00:00:00 2001 From: Frank Sorenson Date: Tue, 23 Oct 2018 10:34:57 -0500 Subject: NFS: change sign of nfs_fh length The filehandle has a length which is defined as a 32-bit "unsigned integer". Change sign of the length appropriately. Signed-off-by: Frank Sorenson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index b7bde12d8cd5..2fc8f6fa25e4 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3516,7 +3516,7 @@ static int decode_attr_exclcreat_supported(struct xdr_stream *xdr, static int decode_attr_filehandle(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fh *fh) { __be32 *p; - int len; + u32 len; if (fh != NULL) memset(fh, 0, sizeof(*fh)); -- cgit v1.2.3