summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Aloni <dan.aloni@vastdata.com>2024-05-06 12:37:59 +0300
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2024-06-12 12:39:39 +0300
commitc393f7890a90a4c216a121df2974dee98834d922 (patch)
treef5435a509c31f20786ecc0c176f29c10d0654aa3
parentb966ef66b9c978f89b37aaa2cb3caf0f24da7cf5 (diff)
downloadlinux-c393f7890a90a4c216a121df2974dee98834d922.tar.xz
rpcrdma: fix handling for RDMA_CM_EVENT_DEVICE_REMOVAL
[ Upstream commit 4836da219781ec510c4c0303df901aa643507a7a ] Under the scenario of IB device bonding, when bringing down one of the ports, or all ports, we saw xprtrdma entering a non-recoverable state where it is not even possible to complete the disconnect and shut it down the mount, requiring a reboot. Following debug, we saw that transport connect never ended after receiving the RDMA_CM_EVENT_DEVICE_REMOVAL callback. The DEVICE_REMOVAL callback is irrespective of whether the CM_ID is connected, and ESTABLISHED may not have happened. So need to work with each of these states accordingly. Fixes: 2acc5cae2923 ('xprtrdma: Prevent dereferencing r_xprt->rx_ep after it is freed') Cc: Sagi Grimberg <sagi.grimberg@vastdata.com> Signed-off-by: Dan Aloni <dan.aloni@vastdata.com> Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Reviewed-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
-rw-r--r--net/sunrpc/xprtrdma/verbs.c6
1 files changed, 5 insertions, 1 deletions
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 4f8d7efa469f..432557a553e7 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -244,7 +244,11 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
case RDMA_CM_EVENT_DEVICE_REMOVAL:
pr_info("rpcrdma: removing device %s for %pISpc\n",
ep->re_id->device->name, sap);
- fallthrough;
+ switch (xchg(&ep->re_connect_status, -ENODEV)) {
+ case 0: goto wake_connect_worker;
+ case 1: goto disconnected;
+ }
+ return 0;
case RDMA_CM_EVENT_ADDR_CHANGE:
ep->re_connect_status = -ENODEV;
goto disconnected;