From 2a369153c82e0c83621b3e71d8f0c53394705bda Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 13 Aug 2012 18:54:45 -0400 Subject: NFS: Clean up helper function nfs4_select_rw_stateid() We want to be able to pass on the information that the page was not dirtied under a lock. Instead of adding a flag parameter, do this by passing a pointer to a 'struct nfs_lock_owner' that may be NULL. Also reuse this structure in struct nfs_lock_context to carry the fl_owner_t and pid_t. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 4b03f56e280e..869eac0c2635 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -81,12 +81,16 @@ struct nfs_access_entry { int mask; }; +struct nfs_lockowner { + fl_owner_t l_owner; + pid_t l_pid; +}; + struct nfs_lock_context { atomic_t count; struct list_head list; struct nfs_open_context *open_context; - fl_owner_t lockowner; - pid_t pid; + struct nfs_lockowner lockowner; }; struct nfs4_state; -- cgit v1.2.3 From 05990d1bf2708b9e84d67074551f964d3738eedc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Sep 2012 16:01:22 -0400 Subject: NFS: Fix fdatasync/fsync() when confronted with a server reboot If the server reboots before it can commit the unstable writes to disk, then nfs_commit_release_pages() will detect this when it compares the verifier returned by COMMIT to the one returned by WRITE. When this happens, the client needs to resend those writes in order to guarantee that they make it to stable storage. This patch adds a signalling mechanism to notify fsync() that it needs to retry all writes before it can exit. Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 34 ++++++++++++++++++++++------------ fs/nfs/nfs4file.c | 22 ++++++++++++---------- fs/nfs/write.c | 1 + include/linux/nfs_fs.h | 1 + 4 files changed, 36 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 6a7fcab7ecb3..cc9b56691bef 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -259,7 +259,7 @@ nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) struct dentry *dentry = file->f_path.dentry; struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = dentry->d_inode; - int have_error, status; + int have_error, do_resend, status; int ret = 0; dprintk("NFS: fsync file(%s/%s) datasync %d\n", @@ -267,15 +267,23 @@ nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync) datasync); nfs_inc_stats(inode, NFSIOS_VFSFSYNC); + do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); status = nfs_commit_inode(inode, FLUSH_SYNC); - if (status >= 0 && ret < 0) - status = ret; have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); - if (have_error) + if (have_error) { ret = xchg(&ctx->error, 0); - if (!ret && status < 0) + if (ret) + goto out; + } + if (status < 0) { ret = status; + goto out; + } + do_resend |= test_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); + if (do_resend) + ret = -EAGAIN; +out: return ret; } EXPORT_SYMBOL_GPL(nfs_file_fsync_commit); @@ -286,13 +294,15 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) int ret; struct inode *inode = file->f_path.dentry->d_inode; - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (ret != 0) - goto out; - mutex_lock(&inode->i_mutex); - ret = nfs_file_fsync_commit(file, start, end, datasync); - mutex_unlock(&inode->i_mutex); -out: + do { + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret != 0) + break; + mutex_lock(&inode->i_mutex); + ret = nfs_file_fsync_commit(file, start, end, datasync); + mutex_unlock(&inode->i_mutex); + } while (ret == -EAGAIN); + return ret; } diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index eb5eb8eef4d3..eef1b38a1b08 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -95,16 +95,18 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) int ret; struct inode *inode = file->f_path.dentry->d_inode; - ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (ret != 0) - goto out; - mutex_lock(&inode->i_mutex); - ret = nfs_file_fsync_commit(file, start, end, datasync); - if (!ret && !datasync) - /* application has asked for meta-data sync */ - ret = pnfs_layoutcommit_inode(inode, true); - mutex_unlock(&inode->i_mutex); -out: + do { + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret != 0) + break; + mutex_lock(&inode->i_mutex); + ret = nfs_file_fsync_commit(file, start, end, datasync); + if (!ret && !datasync) + /* application has asked for meta-data sync */ + ret = pnfs_layoutcommit_inode(inode, true); + mutex_unlock(&inode->i_mutex); + } while (ret == -EAGAIN); + return ret; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e1b5fe4d873a..9347ab7c9574 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1580,6 +1580,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) /* We have a mismatch. Write the page again */ dprintk(" mismatch\n"); nfs_mark_request_dirty(req); + set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags); next: nfs_unlock_and_release_request(req); } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 869eac0c2635..383f3313f053 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -103,6 +103,7 @@ struct nfs_open_context { unsigned long flags; #define NFS_CONTEXT_ERROR_WRITE (0) +#define NFS_CONTEXT_RESEND_WRITES (1) int error; struct list_head list; -- cgit v1.2.3 From d19751e7b9bd8a01d00372325439589886674f79 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Sep 2012 17:21:25 -0400 Subject: SUNRPC: Get rid of the redundant xprt->shutdown bit field It is only set after everyone has dereferenced the transport, and serves no useful purpose: setting it is racy, so all the socket code, etc still needs to be able to cope with the cases where they miss reading it. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 3 +-- net/sunrpc/xprt.c | 8 ++------ net/sunrpc/xprtrdma/transport.c | 22 ++++++++-------------- net/sunrpc/xprtsock.c | 18 ------------------ 4 files changed, 11 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index bf8c49ff7530..951cb9b7d02b 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -173,8 +173,7 @@ struct rpc_xprt { unsigned int min_reqs; /* min number of slots */ atomic_t num_reqs; /* total slots */ unsigned long state; /* transport state */ - unsigned char shutdown : 1, /* being shut down */ - resvport : 1; /* use a reserved port */ + unsigned char resvport : 1; /* use a reserved port */ unsigned int swapper; /* we're swapping over this transport */ unsigned int bind_index; /* bind function index */ diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 5d7f61d7559c..bd462a532acf 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -231,7 +231,7 @@ EXPORT_SYMBOL_GPL(xprt_reserve_xprt); static void xprt_clear_locked(struct rpc_xprt *xprt) { xprt->snd_task = NULL; - if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state) || xprt->shutdown) { + if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) { smp_mb__before_clear_bit(); clear_bit(XPRT_LOCKED, &xprt->state); smp_mb__after_clear_bit(); @@ -504,9 +504,6 @@ EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space); */ void xprt_write_space(struct rpc_xprt *xprt) { - if (unlikely(xprt->shutdown)) - return; - spin_lock_bh(&xprt->transport_lock); if (xprt->snd_task) { dprintk("RPC: write space: waking waiting task on " @@ -679,7 +676,7 @@ xprt_init_autodisconnect(unsigned long data) struct rpc_xprt *xprt = (struct rpc_xprt *)data; spin_lock(&xprt->transport_lock); - if (!list_empty(&xprt->recv) || xprt->shutdown) + if (!list_empty(&xprt->recv)) goto out_abort; if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) goto out_abort; @@ -1262,7 +1259,6 @@ out: static void xprt_destroy(struct rpc_xprt *xprt) { dprintk("RPC: destroying transport %p\n", xprt); - xprt->shutdown = 1; del_timer_sync(&xprt->timer); rpc_destroy_wait_queue(&xprt->binding); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 5d9202dc7cb1..c9aa7a35f3bf 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -199,21 +199,15 @@ xprt_rdma_connect_worker(struct work_struct *work) struct rpc_xprt *xprt = &r_xprt->xprt; int rc = 0; - if (!xprt->shutdown) { - current->flags |= PF_FSTRANS; - xprt_clear_connected(xprt); - - dprintk("RPC: %s: %sconnect\n", __func__, - r_xprt->rx_ep.rep_connected != 0 ? "re" : ""); - rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); - if (rc) - goto out; - } - goto out_clear; + current->flags |= PF_FSTRANS; + xprt_clear_connected(xprt); + + dprintk("RPC: %s: %sconnect\n", __func__, + r_xprt->rx_ep.rep_connected != 0 ? "re" : ""); + rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); + if (rc) + xprt_wake_pending_tasks(xprt, rc); -out: - xprt_wake_pending_tasks(xprt, rc); -out_clear: dprintk("RPC: %s: exit\n", __func__); xprt_clear_connecting(xprt); current->flags &= ~PF_FSTRANS; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 97f8918169ed..aaaadfbe36e9 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -917,9 +917,6 @@ static void xs_local_data_ready(struct sock *sk, int len) if (skb == NULL) goto out; - if (xprt->shutdown) - goto dropit; - repsize = skb->len - sizeof(rpc_fraghdr); if (repsize < 4) { dprintk("RPC: impossible RPC reply size %d\n", repsize); @@ -981,9 +978,6 @@ static void xs_udp_data_ready(struct sock *sk, int len) if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) goto out; - if (xprt->shutdown) - goto dropit; - repsize = skb->len - sizeof(struct udphdr); if (repsize < 4) { dprintk("RPC: impossible RPC reply size %d!\n", repsize); @@ -1412,9 +1406,6 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes) read_lock_bh(&sk->sk_callback_lock); if (!(xprt = xprt_from_sock(sk))) goto out; - if (xprt->shutdown) - goto out; - /* Any data means we had a useful conversation, so * the we don't need to delay the next reconnect */ @@ -1901,9 +1892,6 @@ static void xs_local_setup_socket(struct work_struct *work) struct socket *sock; int status = -EIO; - if (xprt->shutdown) - goto out; - current->flags |= PF_FSTRANS; clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); @@ -2020,9 +2008,6 @@ static void xs_udp_setup_socket(struct work_struct *work) struct socket *sock = transport->sock; int status = -EIO; - if (xprt->shutdown) - goto out; - current->flags |= PF_FSTRANS; /* Start by resetting any existing state */ @@ -2168,9 +2153,6 @@ static void xs_tcp_setup_socket(struct work_struct *work) struct rpc_xprt *xprt = &transport->xprt; int status = -EIO; - if (xprt->shutdown) - goto out; - current->flags |= PF_FSTRANS; if (!sock) { -- cgit v1.2.3 From a0b0a6e39bd1bb4a0922086feee73627cbd53ba4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 17 Sep 2012 17:12:15 -0400 Subject: NFS: Clean up the pNFS layoutget interface Ensure that we do return errors from nfs4_proc_layoutget() and that we don't mark the layout as having failed if the error was due to a signal or resource problem on the client side. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 14 +++++++++----- fs/nfs/pnfs.c | 25 ++++++++++++++++--------- fs/nfs/pnfs.h | 4 ++-- include/linux/nfs_xdr.h | 1 - 4 files changed, 27 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cf2fd5d0c1b3..1c8656f8745c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6286,7 +6286,8 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = { .rpc_release = nfs4_layoutget_release, }; -void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) +struct pnfs_layout_segment * +nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) { struct nfs_server *server = NFS_SERVER(lgp->args.inode); size_t max_pages = max_response_pages(server); @@ -6303,6 +6304,7 @@ void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) .callback_data = lgp, .flags = RPC_TASK_ASYNC, }; + struct pnfs_layout_segment *lseg = NULL; int status = 0; dprintk("--> %s\n", __func__); @@ -6310,7 +6312,7 @@ void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); if (!lgp->args.layout.pages) { nfs4_layoutget_release(lgp); - return; + return ERR_PTR(-ENOMEM); } lgp->args.layout.pglen = max_pages * PAGE_SIZE; @@ -6319,15 +6321,17 @@ void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); task = rpc_run_task(&task_setup_data); if (IS_ERR(task)) - return; + return ERR_CAST(task); status = nfs4_wait_for_completion_rpc_task(task); if (status == 0) status = task->tk_status; if (status == 0) - status = pnfs_layout_process(lgp); + lseg = pnfs_layout_process(lgp); rpc_put_task(task); dprintk("<-- %s status=%d\n", __func__, status); - return; + if (status) + return ERR_PTR(status); + return lseg; } static void diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 2e00feacd4be..3a7ac97020df 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -582,7 +582,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, struct inode *ino = lo->plh_inode; struct nfs_server *server = NFS_SERVER(ino); struct nfs4_layoutget *lgp; - struct pnfs_layout_segment *lseg = NULL; + struct pnfs_layout_segment *lseg; dprintk("--> %s\n", __func__); @@ -599,16 +599,22 @@ send_layoutget(struct pnfs_layout_hdr *lo, lgp->args.type = server->pnfs_curr_ld->id; lgp->args.inode = ino; lgp->args.ctx = get_nfs_open_context(ctx); - lgp->lsegpp = &lseg; lgp->gfp_flags = gfp_flags; /* Synchronously retrieve layout information from server and * store in lseg. */ - nfs4_proc_layoutget(lgp, gfp_flags); - if (!lseg) { - /* remember that LAYOUTGET failed and suspend trying */ - set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); + lseg = nfs4_proc_layoutget(lgp, gfp_flags); + if (IS_ERR(lseg)) { + switch (PTR_ERR(lseg)) { + case -ENOMEM: + case -ERESTARTSYS: + break; + default: + /* remember that LAYOUTGET failed and suspend trying */ + set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); + } + return NULL; } return lseg; @@ -1096,7 +1102,7 @@ out_unlock: } EXPORT_SYMBOL_GPL(pnfs_update_layout); -int +struct pnfs_layout_segment * pnfs_layout_process(struct nfs4_layoutget *lgp) { struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; @@ -1129,7 +1135,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) } init_lseg(lo, lseg); lseg->pls_range = res->range; - *lgp->lsegpp = get_lseg(lseg); + get_lseg(lseg); pnfs_insert_layout(lo, lseg); if (res->return_on_close) { @@ -1140,8 +1146,9 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) /* Done processing layoutget. Set the layout stateid */ pnfs_set_layout_stateid(lo, &res->stateid, false); spin_unlock(&ino->i_lock); + return lseg; out: - return status; + return ERR_PTR(status); out_forget_reply: spin_unlock(&ino->i_lock); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 745aa1b39e7c..d51ef888e71b 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -172,7 +172,7 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server, struct pnfs_devicelist *devlist); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev); -extern void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); +extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ @@ -192,7 +192,7 @@ void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); -int pnfs_layout_process(struct nfs4_layoutget *lgp); +struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_destroy_layout(struct nfs_inode *); void pnfs_destroy_all_layouts(struct nfs_client *); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index be9cf3c7e79e..5da789fdf25b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -251,7 +251,6 @@ struct nfs4_layoutget_res { struct nfs4_layoutget { struct nfs4_layoutget_args args; struct nfs4_layoutget_res res; - struct pnfs_layout_segment **lsegpp; gfp_t gfp_flags; }; -- cgit v1.2.3 From 6168f62cbde8dcf4f58255794efbcdb8df603959 Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Mon, 10 Sep 2012 14:00:46 -0400 Subject: NFSv4: Add ACCESS operation to OPEN compound The OPEN operation has no way to differentiate an open for read and an open for execution - both look like read to the server. This allowed users to read files that didn't have READ access but did have EXEC access, which is obviously wrong. This patch adds an ACCESS call to the OPEN compound to handle the difference between OPENs for reading and execution. Since we're going through the trouble of calling ACCESS, we check all possible access bits and cache the results hopefully avoiding an ACCESS call in the future. Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 16 ++++++++++++++- fs/nfs/nfs4proc.c | 52 +++++++++++++++++++++++++++++++++++++++++-------- fs/nfs/nfs4xdr.c | 16 +++++++++++---- include/linux/nfs_fs.h | 2 ++ include/linux/nfs_xdr.h | 3 +++ 5 files changed, 76 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 627f108ede23..ce8cb926526b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2072,7 +2072,7 @@ found: nfs_access_free_entry(entry); } -static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) +void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) { struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL); if (cache == NULL) @@ -2098,6 +2098,20 @@ static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *s spin_unlock(&nfs_access_lru_lock); } } +EXPORT_SYMBOL_GPL(nfs_access_add_cache); + +void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result) +{ + entry->mask = 0; + if (access_result & NFS4_ACCESS_READ) + entry->mask |= MAY_READ; + if (access_result & + (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE)) + entry->mask |= MAY_WRITE; + if (access_result & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) + entry->mask |= MAY_EXEC; +} +EXPORT_SYMBOL_GPL(nfs_access_set_mask); static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 471a75f11ea2..5b3207f557d9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -104,6 +104,8 @@ static int nfs4_map_errors(int err) return -EACCES; case -NFS4ERR_MINOR_VERS_MISMATCH: return -EPROTONOSUPPORT; + case -NFS4ERR_ACCESS: + return -EACCES; default: dprintk("%s could not handle NFSv4 error %d\n", __func__, -err); @@ -860,6 +862,9 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.fh = NFS_FH(dir); p->o_arg.open_flags = flags; p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); + /* ask server to check for all possible rights as results are cached */ + p->o_arg.access = NFS4_ACCESS_READ | NFS4_ACCESS_MODIFY | + NFS4_ACCESS_EXTEND | NFS4_ACCESS_EXECUTE; p->o_arg.clientid = server->nfs_client->cl_clientid; p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time); p->o_arg.id.uniquifier = sp->so_seqid.owner_id; @@ -1643,6 +1648,39 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data) return status; } +static int nfs4_opendata_access(struct rpc_cred *cred, + struct nfs4_opendata *opendata, + struct nfs4_state *state, fmode_t fmode) +{ + struct nfs_access_entry cache; + u32 mask; + + /* access call failed or for some reason the server doesn't + * support any access modes -- defer access call until later */ + if (opendata->o_res.access_supported == 0) + return 0; + + mask = 0; + if (fmode & FMODE_READ) + mask |= MAY_READ; + if (fmode & FMODE_WRITE) + mask |= MAY_WRITE; + if (fmode & FMODE_EXEC) + mask |= MAY_EXEC; + + cache.cred = cred; + cache.jiffies = jiffies; + nfs_access_set_mask(&cache, opendata->o_res.access_result); + nfs_access_add_cache(state->inode, &cache); + + if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) + return 0; + + /* even though OPEN succeeded, access is denied. Close the file */ + nfs4_close_state(state, fmode); + return -NFS4ERR_ACCESS; +} + /* * Note: On error, nfs4_proc_open will free the struct nfs4_opendata */ @@ -1900,6 +1938,10 @@ static int _nfs4_do_open(struct inode *dir, if (server->caps & NFS_CAP_POSIX_LOCK) set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); + status = nfs4_opendata_access(cred, opendata, state, fmode); + if (status != 0) + goto err_opendata_put; + if (opendata->o_arg.open_flags & O_EXCL) { nfs4_exclusive_attrset(opendata, sattr); @@ -1945,7 +1987,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct nfs4_state *res; int status; - fmode &= FMODE_READ|FMODE_WRITE; + fmode &= FMODE_READ|FMODE_WRITE|FMODE_EXEC; do { status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res, ctx_th); @@ -2771,13 +2813,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (!status) { - entry->mask = 0; - if (res.access & NFS4_ACCESS_READ) - entry->mask |= MAY_READ; - if (res.access & (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE)) - entry->mask |= MAY_WRITE; - if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) - entry->mask |= MAY_EXEC; + nfs_access_set_mask(entry, res.access); nfs_refresh_inode(inode, res.fattr); } nfs_free_fattr(res.fattr); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 7ab29abb3160..657483c34e28 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -447,12 +447,14 @@ static int nfs4_stat_to_errno(int); encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_open_maxsz + \ + encode_access_maxsz + \ encode_getfh_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_open_maxsz + \ + decode_access_maxsz + \ decode_getfh_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_open_confirm_sz \ @@ -467,11 +469,13 @@ static int nfs4_stat_to_errno(int); encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_open_maxsz + \ + encode_access_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_open_maxsz + \ + decode_access_maxsz + \ decode_getattr_maxsz) #define NFS4_enc_open_downgrade_sz \ (compound_encode_hdr_maxsz + \ @@ -2220,6 +2224,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr, encode_putfh(xdr, args->fh, &hdr); encode_open(xdr, args, &hdr); encode_getfh(xdr, &hdr); + encode_access(xdr, args->access, &hdr); encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); encode_nops(&hdr); } @@ -2256,6 +2261,7 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); encode_open(xdr, args, &hdr); + encode_access(xdr, args->access, &hdr); encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -4099,7 +4105,7 @@ out_overflow: return -EIO; } -static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access) +static int decode_access(struct xdr_stream *xdr, u32 *supported, u32 *access) { __be32 *p; uint32_t supp, acc; @@ -4113,8 +4119,8 @@ static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access) goto out_overflow; supp = be32_to_cpup(p++); acc = be32_to_cpup(p); - access->supported = supp; - access->access = acc; + *supported = supp; + *access = acc; return 0; out_overflow: print_overflow_msg(__func__, xdr); @@ -5892,7 +5898,7 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_putfh(xdr); if (status != 0) goto out; - status = decode_access(xdr, res); + status = decode_access(xdr, &res->supported, &res->access); if (status != 0) goto out; decode_getfattr(xdr, res->fattr, res->server); @@ -6233,6 +6239,7 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_getfh(xdr, &res->fh); if (status) goto out; + decode_access(xdr, &res->access_supported, &res->access_result); decode_getfattr(xdr, res->f_attr, res->server); out: return status; @@ -6281,6 +6288,7 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, status = decode_open(xdr, res); if (status) goto out; + decode_access(xdr, &res->access_supported, &res->access_result); decode_getfattr(xdr, res->f_attr, res->server); out: return status; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 383f3313f053..334a2f5f6bf1 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -360,6 +360,8 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); +extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); +extern void nfs_access_set_mask(struct nfs_access_entry *, u32); extern int nfs_permission(struct inode *, int); extern int nfs_open(struct inode *, struct file *); extern int nfs_release(struct inode *, struct file *); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 5da789fdf25b..655490dae953 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -334,6 +334,7 @@ struct nfs_openargs { struct nfs_seqid * seqid; int open_flags; fmode_t fmode; + u32 access; __u64 clientid; struct stateowner_id id; union { @@ -368,6 +369,8 @@ struct nfs_openres { struct nfs4_string *owner; struct nfs4_string *group_owner; struct nfs4_sequence_res seq_res; + __u32 access_supported; + __u32 access_result; }; /* -- cgit v1.2.3 From ba9b584c1dc37851d9c6ca6d0d2ccba55d9aad04 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 14 Sep 2012 17:24:02 -0400 Subject: SUNRPC: Introduce rpc_clone_client_set_auth() An ULP is supposed to be able to replace a GSS rpc_auth object with another GSS rpc_auth object using rpcauth_create(). However, rpcauth_create() in 3.5 reliably fails with -EEXIST in this case. This is because when gss_create() attempts to create the upcall pipes, sometimes they are already there. For example if a pipe FS mount event occurs, or a previous GSS flavor was in use for this rpc_clnt. It turns out that's not the only problem here. While working on a fix for the above problem, we noticed that replacing an rpc_clnt's rpc_auth is not safe, since dereferencing the cl_auth field is not protected in any way. So we're deprecating the ability of rpcauth_create() to switch an rpc_clnt's security flavor during normal operation. Instead, let's add a fresh API that clones an rpc_clnt and gives the clone a new flavor before it's used. This makes immediate use of the new __rpc_clone_client() helper. This can be used in a similar fashion to rpcauth_create() when a client is hunting for the correct security flavor. Instead of replacing an rpc_clnt's security flavor in a loop, the ULP replaces the whole rpc_clnt. To fix the -EEXIST problem, any ULP logic that relies on replacing an rpc_clnt's rpc_auth with rpcauth_create() must be changed to use this API instead. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 13 ++----------- fs/nfs/nfs4namespace.c | 14 +------------- include/linux/sunrpc/clnt.h | 2 ++ net/sunrpc/clnt.c | 22 ++++++++++++++++++++++ 4 files changed, 27 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 99694442b93f..143149db3440 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -668,7 +668,8 @@ int nfs_init_server_rpcclient(struct nfs_server *server, { struct nfs_client *clp = server->nfs_client; - server->client = rpc_clone_client(clp->cl_rpcclient); + server->client = rpc_clone_client_set_auth(clp->cl_rpcclient, + pseudoflavour); if (IS_ERR(server->client)) { dprintk("%s: couldn't create rpc_client!\n", __func__); return PTR_ERR(server->client); @@ -678,16 +679,6 @@ int nfs_init_server_rpcclient(struct nfs_server *server, timeo, sizeof(server->client->cl_timeout_default)); server->client->cl_timeout = &server->client->cl_timeout_default; - - if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) { - struct rpc_auth *auth; - - auth = rpcauth_create(pseudoflavour, server->client); - if (IS_ERR(auth)) { - dprintk("%s: couldn't create credcache!\n", __func__); - return PTR_ERR(auth); - } - } server->client->cl_softrtry = 0; if (server->flags & NFS_MOUNT_SOFT) server->client->cl_softrtry = 1; diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 4fdeb1b7042e..79fbb61ce202 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -192,25 +192,13 @@ out: struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *inode, struct qstr *name) { - struct rpc_clnt *clone; - struct rpc_auth *auth; rpc_authflavor_t flavor; flavor = nfs4_negotiate_security(inode, name); if ((int)flavor < 0) return ERR_PTR((int)flavor); - clone = rpc_clone_client(clnt); - if (IS_ERR(clone)) - return clone; - - auth = rpcauth_create(flavor, clone); - if (IS_ERR(auth)) { - rpc_shutdown_client(clone); - clone = ERR_PTR(-EIO); - } - - return clone; + return rpc_clone_client_set_auth(clnt, flavor); } static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 523547ecfee2..34206b84d8da 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -130,6 +130,8 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, const struct rpc_program *, u32); void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); +struct rpc_clnt *rpc_clone_client_set_auth(struct rpc_clnt *, + rpc_authflavor_t); void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); void rpc_task_release_client(struct rpc_task *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index afbeefab6600..cdc7564b4512 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -548,6 +548,28 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt) } EXPORT_SYMBOL_GPL(rpc_clone_client); +/** + * rpc_clone_client_set_auth - Clone an RPC client structure and set its auth + * + * @clnt: RPC client whose parameters are copied + * @auth: security flavor for new client + * + * Returns a fresh RPC client or an ERR_PTR. + */ +struct rpc_clnt * +rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor) +{ + struct rpc_create_args args = { + .program = clnt->cl_program, + .prognumber = clnt->cl_prog, + .version = clnt->cl_vers, + .authflavor = flavor, + .client_name = clnt->cl_principal, + }; + return __rpc_clone_client(&args, clnt); +} +EXPORT_SYMBOL_GPL(rpc_clone_client_set_auth); + /* * Kill all tasks for the given client. * XXX: kill their descendants as well? -- cgit v1.2.3 From 896526174ce2b6a773e187ebe5a047b68230e2c4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 14 Sep 2012 17:24:11 -0400 Subject: NFS: Introduce "migration" mount option Currently, the Linux client uses a unique nfs_client_id4.id string when identifying itself to distinct NFS servers. To support transparent state migration, the Linux client will have to use the same nfs_client_id4 string for all servers it communicates with (also known as the "uniform client string" approach). Otherwise NFS servers can not recognize that open and lock state need to be merged after a file system transition. Unfortunately, there are some NFSv4.0 servers currently in the field that do not tolerate the uniform client string approach. Thus, by default, our NFSv4.0 mounts will continue to use the current approach, and we introduce a mount option that switches them to use the uniform model. Client administrators must identify which servers can be mounted with this option. Eventually most NFSv4.0 servers will be able to handle the uniform approach, and we can change the default. The first mount of a server controls the behavior for all subsequent mounts for the lifetime of that set of mounts of that server. After the last mount of that server is gone, the client erases the data structure that tracks the lease. A subsequent lease may then honor a different "migration" setting. This patch adds only the infrastructure for parsing the new mount option. Support for uniform client strings is added in a subsequent patch. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 ++ fs/nfs/super.c | 20 ++++++++++++++++++++ include/linux/nfs_fs_sb.h | 2 ++ 3 files changed, 24 insertions(+) (limited to 'include') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 143149db3440..92aed2e08bd5 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -752,6 +752,8 @@ static int nfs_init_server(struct nfs_server *server, data->timeo, data->retrans); if (data->flags & NFS_MOUNT_NORESVPORT) set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + if (server->options & NFS_OPTION_MIGRATION) + set_bit(NFS_CS_MIGRATION, &cl_init.init_flags); /* Allocate or find a client reference we can use */ clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b8eda700584b..056138d45c11 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -88,6 +88,7 @@ enum { Opt_sharecache, Opt_nosharecache, Opt_resvport, Opt_noresvport, Opt_fscache, Opt_nofscache, + Opt_migration, Opt_nomigration, /* Mount options that take integer arguments */ Opt_port, @@ -147,6 +148,8 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_noresvport, "noresvport" }, { Opt_fscache, "fsc" }, { Opt_nofscache, "nofsc" }, + { Opt_migration, "migration" }, + { Opt_nomigration, "nomigration" }, { Opt_port, "port=%s" }, { Opt_rsize, "rsize=%s" }, @@ -676,6 +679,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, if (nfss->options & NFS_OPTION_FSCACHE) seq_printf(m, ",fsc"); + if (nfss->options & NFS_OPTION_MIGRATION) + seq_printf(m, ",migration"); + if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) { if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) seq_printf(m, ",lookupcache=none"); @@ -1243,6 +1249,12 @@ static int nfs_parse_mount_options(char *raw, kfree(mnt->fscache_uniq); mnt->fscache_uniq = NULL; break; + case Opt_migration: + mnt->options |= NFS_OPTION_MIGRATION; + break; + case Opt_nomigration: + mnt->options &= NFS_OPTION_MIGRATION; + break; /* * options that take numeric values @@ -1535,6 +1547,10 @@ static int nfs_parse_mount_options(char *raw, if (mnt->minorversion && mnt->version != 4) goto out_minorversion_mismatch; + if (mnt->options & NFS_OPTION_MIGRATION && + mnt->version != 4 && mnt->minorversion != 0) + goto out_migration_misuse; + /* * verify that any proto=/mountproto= options match the address * familiies in the addr=/mountaddr= options. @@ -1572,6 +1588,10 @@ out_minorversion_mismatch: printk(KERN_INFO "NFS: mount option vers=%u does not support " "minorversion=%u\n", mnt->version, mnt->minorversion); return 0; +out_migration_misuse: + printk(KERN_INFO + "NFS: 'migration' not supported for this NFS version\n"); + return 0; out_nomem: printk(KERN_INFO "NFS: not enough memory to parse option\n"); return 0; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 310c63c8ab2c..2e22fc7e47cf 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -39,6 +39,7 @@ struct nfs_client { unsigned long cl_flags; /* behavior switches */ #define NFS_CS_NORESVPORT 0 /* - use ephemeral src port */ #define NFS_CS_DISCRTRY 1 /* - disconnect on RPC retry */ +#define NFS_CS_MIGRATION 2 /* - transparent state migr */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ @@ -125,6 +126,7 @@ struct nfs_server { unsigned int namelen; unsigned int options; /* extra options enabled by mount */ #define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */ +#define NFS_OPTION_MIGRATION 0x00000002 /* - NFSv4 migration enabled */ struct nfs_fsid fsid; __u64 maxfilesize; /* maximum file size */ -- cgit v1.2.3 From 05f4c350ee02e9461c6ae3a880ea326a06835e37 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 14 Sep 2012 17:24:32 -0400 Subject: NFS: Discover NFSv4 server trunking when mounting "Server trunking" is a fancy named for a multi-homed NFS server. Trunking might occur if a client sends NFS requests for a single workload to multiple network interfaces on the same server. There are some implications for NFSv4 state management that make it useful for a client to know if a single NFSv4 server instance is multi-homed. (Note this is only a consideration for NFSv4, not for legacy versions of NFS, which are stateless). If a client cares about server trunking, no NFSv4 operations can proceed until that client determines who it is talking to. Thus server IP trunking discovery must be done when the client first encounters an unfamiliar server IP address. The nfs_get_client() function walks the nfs_client_list and matches on server IP address. The outcome of that walk tells us immediately if we have an unfamiliar server IP address. It invokes nfs_init_client() in this case. Thus, nfs4_init_client() is a good spot to perform trunking discovery. Discovery requires a client to establish a fresh client ID, so our client will now send SETCLIENTID or EXCHANGE_ID as the first NFS operation after a successful ping, rather than waiting for an application to perform an operation that requires NFSv4 state. The exact process for detecting trunking is different for NFSv4.0 and NFSv4.1, so a minorversion-specific init_client callout method is introduced. CLID_INUSE recovery is important for the trunking discovery process. CLID_INUSE is a sign the server recognizes the client's nfs_client_id4 id string, but the client is using the wrong principal this time for the SETCLIENTID operation. The SETCLIENTID must be retried with a series of different principals until one works, and then the rest of trunking discovery can proceed. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 3 +- fs/nfs/internal.h | 6 ++ fs/nfs/nfs4_fs.h | 8 ++ fs/nfs/nfs4client.c | 253 ++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4proc.c | 4 + fs/nfs/nfs4state.c | 182 ++++++++++++++++++++++++++++++++- include/linux/nfs_fs_sb.h | 1 + 7 files changed, 455 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 92aed2e08bd5..57d2a5c3d933 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -498,7 +498,8 @@ nfs_get_client(const struct nfs_client_initdata *cl_init, return nfs_found_client(cl_init, clp); } if (new) { - list_add(&new->cl_share_link, &nn->nfs_client_list); + list_add_tail(&new->cl_share_link, + &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); new->cl_flags = cl_init->init_flags; return rpc_ops->init_client(new, timeparms, ip_addr, diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 89560be07e4a..89a795dc3027 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -483,6 +483,12 @@ extern int _nfs4_call_sync_session(struct rpc_clnt *clnt, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, int cache_reply); +extern int nfs40_walk_client_list(struct nfs_client *clp, + struct nfs_client **result, + struct rpc_cred *cred); +extern int nfs41_walk_client_list(struct nfs_client *clp, + struct nfs_client **result, + struct rpc_cred *cred); /* * Determine the device name as a string diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 9cacc131a8a4..832503c7a00e 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -191,6 +191,8 @@ struct nfs4_state_recovery_ops { int (*establish_clid)(struct nfs_client *, struct rpc_cred *); struct rpc_cred * (*get_clid_cred)(struct nfs_client *); int (*reclaim_complete)(struct nfs_client *); + int (*detect_trunking)(struct nfs_client *, struct nfs_client **, + struct rpc_cred *); }; struct nfs4_state_maintenance_ops { @@ -320,9 +322,15 @@ extern void nfs4_renew_state(struct work_struct *); /* nfs4state.c */ struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp); struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp); +int nfs4_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **); +int nfs40_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **, struct rpc_cred *); #if defined(CONFIG_NFS_V4_1) struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); +int nfs41_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **, struct rpc_cred *); extern void nfs4_schedule_session_recovery(struct nfs4_session *, int); #else static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 612f5ebaabac..14ddd4d30966 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -185,6 +185,7 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, rpc_authflavor_t authflavour) { char buf[INET6_ADDRSTRLEN + 1]; + struct nfs_client *old; int error; if (clp->cl_cons_state == NFS_CS_READY) { @@ -230,6 +231,17 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, if (!nfs4_has_session(clp)) nfs_mark_client_ready(clp, NFS_CS_READY); + + error = nfs4_discover_server_trunking(clp, &old); + if (error < 0) + goto error; + if (clp != old) { + clp->cl_preserve_clid = true; + nfs_put_client(clp); + clp = old; + atomic_inc(&clp->cl_count); + } + return clp; error: @@ -239,6 +251,247 @@ error: return ERR_PTR(error); } +/* + * Returns true if the client IDs match + */ +static bool nfs4_match_clientids(struct nfs_client *a, struct nfs_client *b) +{ + if (a->cl_clientid != b->cl_clientid) { + dprintk("NFS: --> %s client ID %llx does not match %llx\n", + __func__, a->cl_clientid, b->cl_clientid); + return false; + } + dprintk("NFS: --> %s client ID %llx matches %llx\n", + __func__, a->cl_clientid, b->cl_clientid); + return true; +} + +/* + * SETCLIENTID just did a callback update with the callback ident in + * "drop," but server trunking discovery claims "drop" and "keep" are + * actually the same server. Swap the callback IDs so that "keep" + * will continue to use the callback ident the server now knows about, + * and so that "keep"'s original callback ident is destroyed when + * "drop" is freed. + */ +static void nfs4_swap_callback_idents(struct nfs_client *keep, + struct nfs_client *drop) +{ + struct nfs_net *nn = net_generic(keep->cl_net, nfs_net_id); + unsigned int save = keep->cl_cb_ident; + + if (keep->cl_cb_ident == drop->cl_cb_ident) + return; + + dprintk("%s: keeping callback ident %u and dropping ident %u\n", + __func__, keep->cl_cb_ident, drop->cl_cb_ident); + + spin_lock(&nn->nfs_client_lock); + + idr_replace(&nn->cb_ident_idr, keep, drop->cl_cb_ident); + keep->cl_cb_ident = drop->cl_cb_ident; + + idr_replace(&nn->cb_ident_idr, drop, save); + drop->cl_cb_ident = save; + + spin_unlock(&nn->nfs_client_lock); +} + +/** + * nfs40_walk_client_list - Find server that recognizes a client ID + * + * @new: nfs_client with client ID to test + * @result: OUT: found nfs_client, or new + * @cred: credential to use for trunking test + * + * Returns zero, a negative errno, or a negative NFS4ERR status. + * If zero is returned, an nfs_client pointer is planted in "result." + * + * NB: nfs40_walk_client_list() relies on the new nfs_client being + * the last nfs_client on the list. + */ +int nfs40_walk_client_list(struct nfs_client *new, + struct nfs_client **result, + struct rpc_cred *cred) +{ + struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); + struct nfs_client *pos, *n, *prev = NULL; + struct nfs4_setclientid_res clid = { + .clientid = new->cl_clientid, + .confirm = new->cl_confirm, + }; + int status; + + spin_lock(&nn->nfs_client_lock); + list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { + /* If "pos" isn't marked ready, we can't trust the + * remaining fields in "pos" */ + if (pos->cl_cons_state < NFS_CS_READY) + continue; + + if (pos->rpc_ops != new->rpc_ops) + continue; + + if (pos->cl_proto != new->cl_proto) + continue; + + if (pos->cl_minorversion != new->cl_minorversion) + continue; + + if (pos->cl_clientid != new->cl_clientid) + continue; + + atomic_inc(&pos->cl_count); + spin_unlock(&nn->nfs_client_lock); + + if (prev) + nfs_put_client(prev); + + status = nfs4_proc_setclientid_confirm(pos, &clid, cred); + if (status == 0) { + nfs4_swap_callback_idents(pos, new); + + nfs_put_client(pos); + *result = pos; + dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", + __func__, pos, atomic_read(&pos->cl_count)); + return 0; + } + if (status != -NFS4ERR_STALE_CLIENTID) { + nfs_put_client(pos); + dprintk("NFS: <-- %s status = %d, no result\n", + __func__, status); + return status; + } + + spin_lock(&nn->nfs_client_lock); + prev = pos; + } + + /* + * No matching nfs_client found. This should be impossible, + * because the new nfs_client has already been added to + * nfs_client_list by nfs_get_client(). + * + * Don't BUG(), since the caller is holding a mutex. + */ + if (prev) + nfs_put_client(prev); + spin_unlock(&nn->nfs_client_lock); + pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); + return -NFS4ERR_STALE_CLIENTID; +} + +#ifdef CONFIG_NFS_V4_1 +/* + * Returns true if the server owners match + */ +static bool +nfs4_match_serverowners(struct nfs_client *a, struct nfs_client *b) +{ + struct nfs41_server_owner *o1 = a->cl_serverowner; + struct nfs41_server_owner *o2 = b->cl_serverowner; + + if (o1->minor_id != o2->minor_id) { + dprintk("NFS: --> %s server owner minor IDs do not match\n", + __func__); + return false; + } + + if (o1->major_id_sz != o2->major_id_sz) + goto out_major_mismatch; + if (memcmp(o1->major_id, o2->major_id, o1->major_id_sz) != 0) + goto out_major_mismatch; + + dprintk("NFS: --> %s server owners match\n", __func__); + return true; + +out_major_mismatch: + dprintk("NFS: --> %s server owner major IDs do not match\n", + __func__); + return false; +} + +/** + * nfs41_walk_client_list - Find nfs_client that matches a client/server owner + * + * @new: nfs_client with client ID to test + * @result: OUT: found nfs_client, or new + * @cred: credential to use for trunking test + * + * Returns zero, a negative errno, or a negative NFS4ERR status. + * If zero is returned, an nfs_client pointer is planted in "result." + * + * NB: nfs41_walk_client_list() relies on the new nfs_client being + * the last nfs_client on the list. + */ +int nfs41_walk_client_list(struct nfs_client *new, + struct nfs_client **result, + struct rpc_cred *cred) +{ + struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); + struct nfs_client *pos, *n, *prev = NULL; + int error; + + spin_lock(&nn->nfs_client_lock); + list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { + /* If "pos" isn't marked ready, we can't trust the + * remaining fields in "pos", especially the client + * ID and serverowner fields. Wait for CREATE_SESSION + * to finish. */ + if (pos->cl_cons_state < NFS_CS_READY) { + atomic_inc(&pos->cl_count); + spin_unlock(&nn->nfs_client_lock); + + if (prev) + nfs_put_client(prev); + prev = pos; + + error = nfs_wait_client_init_complete(pos); + if (error < 0) { + nfs_put_client(pos); + continue; + } + + spin_lock(&nn->nfs_client_lock); + } + + if (pos->rpc_ops != new->rpc_ops) + continue; + + if (pos->cl_proto != new->cl_proto) + continue; + + if (pos->cl_minorversion != new->cl_minorversion) + continue; + + if (!nfs4_match_clientids(pos, new)) + continue; + + if (!nfs4_match_serverowners(pos, new)) + continue; + + spin_unlock(&nn->nfs_client_lock); + dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", + __func__, pos, atomic_read(&pos->cl_count)); + + *result = pos; + return 0; + } + + /* + * No matching nfs_client found. This should be impossible, + * because the new nfs_client has already been added to + * nfs_client_list by nfs_get_client(). + * + * Don't BUG(), since the caller is holding a mutex. + */ + spin_unlock(&nn->nfs_client_lock); + pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); + return -NFS4ERR_STALE_CLIENTID; +} +#endif /* CONFIG_NFS_V4_1 */ + static void nfs4_destroy_server(struct nfs_server *server) { nfs_server_return_all_delegations(server); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 461411171966..b5834abfcbff 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5458,6 +5458,8 @@ int nfs4_destroy_clientid(struct nfs_client *clp) goto out; if (clp->cl_exchange_flags == 0) goto out; + if (clp->cl_preserve_clid) + goto out; cred = nfs4_get_exchange_id_cred(clp); ret = nfs4_proc_destroy_clientid(clp, cred); if (cred) @@ -6871,6 +6873,7 @@ static const struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { .recover_lock = nfs4_lock_reclaim, .establish_clid = nfs4_init_clientid, .get_clid_cred = nfs4_get_setclientid_cred, + .detect_trunking = nfs40_discover_server_trunking, }; #if defined(CONFIG_NFS_V4_1) @@ -6882,6 +6885,7 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { .establish_clid = nfs41_init_clientid, .get_clid_cred = nfs4_get_exchange_id_cred, .reclaim_complete = nfs41_proc_reclaim_complete, + .detect_trunking = nfs41_discover_server_trunking, }; #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 38eeefd95375..5c4286643701 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -51,6 +51,8 @@ #include #include +#include + #include "nfs4_fs.h" #include "callback.h" #include "delegation.h" @@ -63,7 +65,7 @@ #define OPENOWNER_POOL_SIZE 8 const nfs4_stateid zero_stateid; - +static DEFINE_MUTEX(nfs_clid_init_mutex); static LIST_HEAD(nfs4_clientid_list); int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) @@ -98,6 +100,55 @@ out: return status; } +/** + * nfs40_discover_server_trunking - Detect server IP address trunking (mv0) + * + * @clp: nfs_client under test + * @result: OUT: found nfs_client, or clp + * @cred: credential to use for trunking test + * + * Returns zero, a negative errno, or a negative NFS4ERR status. + * If zero is returned, an nfs_client pointer is planted in + * "result". + * + * Note: The returned client may not yet be marked ready. + */ +int nfs40_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **result, + struct rpc_cred *cred) +{ + struct nfs4_setclientid_res clid = { + .clientid = clp->cl_clientid, + .confirm = clp->cl_confirm, + }; + unsigned short port; + int status; + + port = nfs_callback_tcpport; + if (clp->cl_addr.ss_family == AF_INET6) + port = nfs_callback_tcpport6; + + status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid); + if (status != 0) + goto out; + clp->cl_clientid = clid.clientid; + clp->cl_confirm = clid.confirm; + + status = nfs40_walk_client_list(clp, result, cred); + switch (status) { + case -NFS4ERR_STALE_CLIENTID: + set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); + case 0: + /* Sustain the lease, even if it's empty. If the clientid4 + * goes stale it's of no use for trunking discovery. */ + nfs4_schedule_state_renewal(*result); + break; + } + +out: + return status; +} + struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp) { struct rpc_cred *cred = NULL; @@ -277,6 +328,32 @@ out: return status; } +/** + * nfs41_discover_server_trunking - Detect server IP address trunking (mv1) + * + * @clp: nfs_client under test + * @result: OUT: found nfs_client, or clp + * @cred: credential to use for trunking test + * + * Returns NFS4_OK, a negative errno, or a negative NFS4ERR status. + * If NFS4_OK is returned, an nfs_client pointer is planted in + * "result". + * + * Note: The returned client may not yet be marked ready. + */ +int nfs41_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **result, + struct rpc_cred *cred) +{ + int status; + + status = nfs4_proc_exchange_id(clp, cred); + if (status != NFS4_OK) + return status; + + return nfs41_walk_client_list(clp, result, cred); +} + struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp) { struct rpc_cred *cred; @@ -1705,6 +1782,109 @@ static int nfs4_purge_lease(struct nfs_client *clp) return 0; } +/** + * nfs4_discover_server_trunking - Detect server IP address trunking + * + * @clp: nfs_client under test + * @result: OUT: found nfs_client, or clp + * + * Returns zero or a negative errno. If zero is returned, + * an nfs_client pointer is planted in "result". + * + * Note: since we are invoked in process context, and + * not from inside the state manager, we cannot use + * nfs4_handle_reclaim_lease_error(). + */ +int nfs4_discover_server_trunking(struct nfs_client *clp, + struct nfs_client **result) +{ + const struct nfs4_state_recovery_ops *ops = + clp->cl_mvops->reboot_recovery_ops; + rpc_authflavor_t *flavors, flav, save; + struct rpc_clnt *clnt; + struct rpc_cred *cred; + int i, len, status; + + dprintk("NFS: %s: testing '%s'\n", __func__, clp->cl_hostname); + + len = NFS_MAX_SECFLAVORS; + flavors = kcalloc(len, sizeof(*flavors), GFP_KERNEL); + if (flavors == NULL) { + status = -ENOMEM; + goto out; + } + len = rpcauth_list_flavors(flavors, len); + if (len < 0) { + status = len; + goto out_free; + } + clnt = clp->cl_rpcclient; + save = clnt->cl_auth->au_flavor; + i = 0; + + mutex_lock(&nfs_clid_init_mutex); + status = -ENOENT; +again: + cred = ops->get_clid_cred(clp); + if (cred == NULL) + goto out_unlock; + + status = ops->detect_trunking(clp, result, cred); + put_rpccred(cred); + switch (status) { + case 0: + break; + + case -EACCES: + if (clp->cl_machine_cred == NULL) + break; + /* Handle case where the user hasn't set up machine creds */ + nfs4_clear_machine_cred(clp); + case -NFS4ERR_DELAY: + case -ETIMEDOUT: + case -EAGAIN: + ssleep(1); + dprintk("NFS: %s after status %d, retrying\n", + __func__, status); + goto again; + + case -NFS4ERR_CLID_INUSE: + case -NFS4ERR_WRONGSEC: + status = -EPERM; + if (i >= len) + break; + + flav = flavors[i++]; + if (flav == save) + flav = flavors[i++]; + clnt = rpc_clone_client_set_auth(clnt, flav); + if (IS_ERR(clnt)) { + status = PTR_ERR(clnt); + break; + } + clp->cl_rpcclient = clnt; + goto again; + + case -NFS4ERR_MINOR_VERS_MISMATCH: + status = -EPROTONOSUPPORT; + break; + + case -EKEYEXPIRED: + nfs4_warn_keyexpired(clp->cl_hostname); + case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery + * in nfs4_exchange_id */ + status = -EKEYEXPIRED; + } + +out_unlock: + mutex_unlock(&nfs_clid_init_mutex); +out_free: + kfree(flavors); +out: + dprintk("NFS: %s: status = %d\n", __func__, status); + return status; +} + #ifdef CONFIG_NFS_V4_1 void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) { diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 2e22fc7e47cf..a9e76ee1adca 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -82,6 +82,7 @@ struct nfs_client { /* The flags used for obtaining the clientid during EXCHANGE_ID */ u32 cl_exchange_flags; struct nfs4_session *cl_session; /* shared session */ + bool cl_preserve_clid; struct nfs41_server_owner *cl_serverowner; struct nfs41_server_scope *cl_serverscope; struct nfs41_impl_id *cl_implid; -- cgit v1.2.3 From ae2bb03236fc978bdf673c19d39832500793b83c Mon Sep 17 00:00:00 2001 From: Weston Andros Adamson Date: Tue, 2 Oct 2012 14:49:52 -0700 Subject: NFSv4: don't put ACCESS in OPEN compound if O_EXCL Don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS will return permission denied for all bits until close. Fixes a regression due to commit 6168f62c (NFSv4: Add ACCESS operation to OPEN compound) Signed-off-by: Weston Andros Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 12 +++++++++--- fs/nfs/nfs4xdr.c | 12 ++++++++---- include/linux/nfs_xdr.h | 1 + 3 files changed, 18 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ccada6856f0d..21cfac7c2ff8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -862,9 +862,15 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.fh = NFS_FH(dir); p->o_arg.open_flags = flags; p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); - /* ask server to check for all possible rights as results are cached */ - p->o_arg.access = NFS4_ACCESS_READ | NFS4_ACCESS_MODIFY | - NFS4_ACCESS_EXTEND | NFS4_ACCESS_EXECUTE; + /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS + * will return permission denied for all bits until close */ + if (!(flags & O_EXCL)) { + /* ask server to check for all possible rights as results + * are cached */ + p->o_arg.access = NFS4_ACCESS_READ | NFS4_ACCESS_MODIFY | + NFS4_ACCESS_EXTEND | NFS4_ACCESS_EXECUTE; + p->o_res.access_request = p->o_arg.access; + } p->o_arg.clientid = server->nfs_client->cl_clientid; p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time); p->o_arg.id.uniquifier = sp->so_seqid.owner_id; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 657483c34e28..0d6030510fe2 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2224,7 +2224,8 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr, encode_putfh(xdr, args->fh, &hdr); encode_open(xdr, args, &hdr); encode_getfh(xdr, &hdr); - encode_access(xdr, args->access, &hdr); + if (args->access) + encode_access(xdr, args->access, &hdr); encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); encode_nops(&hdr); } @@ -2261,7 +2262,8 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); encode_open(xdr, args, &hdr); - encode_access(xdr, args->access, &hdr); + if (args->access) + encode_access(xdr, args->access, &hdr); encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -6239,7 +6241,8 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_getfh(xdr, &res->fh); if (status) goto out; - decode_access(xdr, &res->access_supported, &res->access_result); + if (res->access_request) + decode_access(xdr, &res->access_supported, &res->access_result); decode_getfattr(xdr, res->f_attr, res->server); out: return status; @@ -6288,7 +6291,8 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, status = decode_open(xdr, res); if (status) goto out; - decode_access(xdr, &res->access_supported, &res->access_result); + if (res->access_request) + decode_access(xdr, &res->access_supported, &res->access_result); decode_getfattr(xdr, res->f_attr, res->server); out: return status; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 655490dae953..a73ea89789d1 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -369,6 +369,7 @@ struct nfs_openres { struct nfs4_string *owner; struct nfs4_string *group_owner; struct nfs4_sequence_res seq_res; + __u32 access_request; __u32 access_supported; __u32 access_result; }; -- cgit v1.2.3