summaryrefslogtreecommitdiff
path: root/net/sunrpc
diff options
context:
space:
mode:
authorBill Baker <Bill.Baker@Oracle.com>2018-06-20 00:24:58 +0300
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2018-09-09 20:55:59 +0300
commitbf23ba3737e097d80363b5f5aea6239a952a36a9 (patch)
tree08223bcf13a7fb96008e5626d1ef807ca71c9c7e /net/sunrpc
parentec13c53dc59f82cfaa71a295430c0005a8836f27 (diff)
downloadlinux-bf23ba3737e097d80363b5f5aea6239a952a36a9.tar.xz
NFSv4 client live hangs after live data migration recovery
commit 0f90be132cbf1537d87a6a8b9e80867adac892f6 upstream. After a live data migration event at the NFS server, the client may send I/O requests to the wrong server, causing a live hang due to repeated recovery events. On the wire, this will appear as an I/O request failing with NFS4ERR_BADSESSION, followed by successful CREATE_SESSION, repeatedly. NFS4ERR_BADSSESSION is returned because the session ID being used was issued by the other server and is not valid at the old server. The failure is caused by async worker threads having cached the transport (xprt) in the rpc_task structure. After the migration recovery completes, the task is redispatched and the task resends the request to the wrong server based on the old value still present in tk_xprt. The solution is to recompute the tk_xprt field of the rpc_task structure so that the request goes to the correct server. Signed-off-by: Bill Baker <bill.baker@oracle.com> Reviewed-by: Chuck Lever <chuck.lever@oracle.com> Tested-by: Helen Chao <helen.chao@oracle.com> Fixes: fb43d17210ba ("SUNRPC: Use the multipath iterator to assign a ...") Cc: stable@vger.kernel.org # v4.9+ Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'net/sunrpc')
-rw-r--r--net/sunrpc/clnt.c28
1 files changed, 20 insertions, 8 deletions
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 2ad827db2704..6d118357d9dc 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -965,10 +965,20 @@ out:
}
EXPORT_SYMBOL_GPL(rpc_bind_new_program);
+void rpc_task_release_transport(struct rpc_task *task)
+{
+ struct rpc_xprt *xprt = task->tk_xprt;
+
+ if (xprt) {
+ task->tk_xprt = NULL;
+ xprt_put(xprt);
+ }
+}
+EXPORT_SYMBOL_GPL(rpc_task_release_transport);
+
void rpc_task_release_client(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
- struct rpc_xprt *xprt = task->tk_xprt;
if (clnt != NULL) {
/* Remove from client task list */
@@ -979,12 +989,14 @@ void rpc_task_release_client(struct rpc_task *task)
rpc_release_client(clnt);
}
+ rpc_task_release_transport(task);
+}
- if (xprt != NULL) {
- task->tk_xprt = NULL;
-
- xprt_put(xprt);
- }
+static
+void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt)
+{
+ if (!task->tk_xprt)
+ task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi);
}
static
@@ -992,8 +1004,7 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
{
if (clnt != NULL) {
- if (task->tk_xprt == NULL)
- task->tk_xprt = xprt_iter_get_next(&clnt->cl_xpi);
+ rpc_task_set_transport(task, clnt);
task->tk_client = clnt;
atomic_inc(&clnt->cl_count);
if (clnt->cl_softrtry)
@@ -1529,6 +1540,7 @@ call_start(struct rpc_task *task)
clnt->cl_program->version[clnt->cl_vers]->counts[idx]++;
clnt->cl_stats->rpccnt++;
task->tk_action = call_reserve;
+ rpc_task_set_transport(task, clnt);
}
/*