From 0550d4604e2ca4e653dc13f0c009fc42106b6bfc Mon Sep 17 00:00:00 2001 From: Shigeru Yoshida Date: Wed, 8 Nov 2023 23:31:13 +0900 Subject: RDMA/core: Fix uninit-value access in ib_get_eth_speed() KMSAN reported the following uninit-value access issue: lo speed is unknown, defaulting to 1000 ===================================================== BUG: KMSAN: uninit-value in ib_get_width_and_speed drivers/infiniband/core/verbs.c:1889 [inline] BUG: KMSAN: uninit-value in ib_get_eth_speed+0x546/0xaf0 drivers/infiniband/core/verbs.c:1998 ib_get_width_and_speed drivers/infiniband/core/verbs.c:1889 [inline] ib_get_eth_speed+0x546/0xaf0 drivers/infiniband/core/verbs.c:1998 siw_query_port drivers/infiniband/sw/siw/siw_verbs.c:173 [inline] siw_get_port_immutable+0x6f/0x120 drivers/infiniband/sw/siw/siw_verbs.c:203 setup_port_data drivers/infiniband/core/device.c:848 [inline] setup_device drivers/infiniband/core/device.c:1244 [inline] ib_register_device+0x1589/0x1df0 drivers/infiniband/core/device.c:1383 siw_device_register drivers/infiniband/sw/siw/siw_main.c:72 [inline] siw_newlink+0x129e/0x13d0 drivers/infiniband/sw/siw/siw_main.c:490 nldev_newlink+0x8fd/0xa60 drivers/infiniband/core/nldev.c:1763 rdma_nl_rcv_skb drivers/infiniband/core/netlink.c:239 [inline] rdma_nl_rcv+0xe8a/0x1120 drivers/infiniband/core/netlink.c:259 netlink_unicast_kernel net/netlink/af_netlink.c:1342 [inline] netlink_unicast+0xf4b/0x1230 net/netlink/af_netlink.c:1368 netlink_sendmsg+0x1242/0x1420 net/netlink/af_netlink.c:1910 sock_sendmsg_nosec net/socket.c:730 [inline] __sock_sendmsg net/socket.c:745 [inline] ____sys_sendmsg+0x997/0xd60 net/socket.c:2588 ___sys_sendmsg+0x271/0x3b0 net/socket.c:2642 __sys_sendmsg net/socket.c:2671 [inline] __do_sys_sendmsg net/socket.c:2680 [inline] __se_sys_sendmsg net/socket.c:2678 [inline] __x64_sys_sendmsg+0x2fa/0x4a0 net/socket.c:2678 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b Local variable lksettings created at: ib_get_eth_speed+0x4b/0xaf0 drivers/infiniband/core/verbs.c:1974 siw_query_port drivers/infiniband/sw/siw/siw_verbs.c:173 [inline] siw_get_port_immutable+0x6f/0x120 drivers/infiniband/sw/siw/siw_verbs.c:203 CPU: 0 PID: 11257 Comm: syz-executor.1 Not tainted 6.6.0-14500-g1c41041124bd #10 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc38 04/01/2014 ===================================================== If __ethtool_get_link_ksettings() fails, `netdev_speed` is set to the default value, SPEED_1000. In this case, if `lanes` field of struct ethtool_link_ksettings is not initialized, an uninitialized value is passed to ib_get_width_and_speed(). This causes the above issue. This patch resolves the issue by initializing `lanes` to 0. Fixes: cb06b6b3f6cb ("RDMA/core: Get IB width and speed from netdev") Signed-off-by: Shigeru Yoshida Link: https://lore.kernel.org/r/20231108143113.1360567-1-syoshida@redhat.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 8a6da87f464b..94a7f3b0c71c 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1971,7 +1971,7 @@ int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width) int rc; u32 netdev_speed; struct net_device *netdev; - struct ethtool_link_ksettings lksettings; + struct ethtool_link_ksettings lksettings = {}; if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET) return -EINVAL; -- cgit v1.2.3 From efb9cbf66440482ceaa90493d648226ab7ec2ebf Mon Sep 17 00:00:00 2001 From: Junxian Huang Date: Sat, 28 Oct 2023 17:32:42 +0800 Subject: RDMA/hns: Fix unnecessary err return when using invalid congest control algorithm Add a default congest control algorithm so that driver won't return an error when the configured algorithm is invalid. Fixes: f91696f2f053 ("RDMA/hns: Support congestion control type selection according to the FW") Signed-off-by: Junxian Huang Link: https://lore.kernel.org/r/20231028093242.670325-1-huangjunxian6@hisilicon.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0cd2612a4987..2bca9560f32d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4760,10 +4760,15 @@ static int check_cong_type(struct ib_qp *ibqp, cong_alg->wnd_mode_sel = WND_LIMIT; break; default: - ibdev_err(&hr_dev->ib_dev, - "error type(%u) for congestion selection.\n", - hr_dev->caps.cong_type); - return -EINVAL; + ibdev_warn(&hr_dev->ib_dev, + "invalid type(%u) for congestion selection.\n", + hr_dev->caps.cong_type); + hr_dev->caps.cong_type = CONG_TYPE_DCQCN; + cong_alg->alg_sel = CONG_DCQCN; + cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL; + cong_alg->dip_vld = DIP_INVALID; + cong_alg->wnd_mode_sel = WND_LIMIT; + break; } return 0; -- cgit v1.2.3 From ba12ab66aa83a2340a51ad6e74b284269745138c Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 14 Nov 2023 11:02:45 -0600 Subject: RDMA/irdma: Do not modify to SQD on error Remove the modify to SQD before going to ERROR state. It is not needed. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20231114170246.238-2-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/verbs.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 2138f0a2ff85..36e69e6ca9f8 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -1424,13 +1424,6 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, case IB_QPS_SQE: case IB_QPS_ERR: case IB_QPS_RESET: - if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS) { - spin_unlock_irqrestore(&iwqp->lock, flags); - info.next_iwarp_state = IRDMA_QP_STATE_SQD; - irdma_hw_modify_qp(iwdev, iwqp, &info, true); - spin_lock_irqsave(&iwqp->lock, flags); - } - if (iwqp->iwarp_state == IRDMA_QP_STATE_ERROR) { spin_unlock_irqrestore(&iwqp->lock, flags); if (udata && udata->inlen) { -- cgit v1.2.3 From bd6da690c27d75cae432c09162d054b34fa2156f Mon Sep 17 00:00:00 2001 From: Mustafa Ismail Date: Tue, 14 Nov 2023 11:02:46 -0600 Subject: RDMA/irdma: Add wait for suspend on SQD Currently, there is no wait for the QP suspend to complete on a modify to SQD state. Add a wait, after the modify to SQD state, for the Suspend Complete AE. While we are at it, update the suspend timeout value in irdma_prep_tc_change to use IRDMA_EVENT_TIMEOUT_MS too. Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") Signed-off-by: Mustafa Ismail Signed-off-by: Shiraz Saleem Link: https://lore.kernel.org/r/20231114170246.238-3-shiraz.saleem@intel.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/hw.c | 6 +++++- drivers/infiniband/hw/irdma/main.c | 2 +- drivers/infiniband/hw/irdma/main.h | 2 +- drivers/infiniband/hw/irdma/verbs.c | 21 +++++++++++++++++++++ drivers/infiniband/hw/irdma/verbs.h | 1 + 5 files changed, 29 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 8fa7e4a18e73..74f6bc7b7ad1 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -321,7 +321,11 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) break; case IRDMA_AE_QP_SUSPEND_COMPLETE: if (iwqp->iwdev->vsi.tc_change_pending) { - atomic_dec(&iwqp->sc_qp.vsi->qp_suspend_reqs); + if (!atomic_dec_return(&qp->vsi->qp_suspend_reqs)) + wake_up(&iwqp->iwdev->suspend_wq); + } + if (iwqp->suspend_pending) { + iwqp->suspend_pending = false; wake_up(&iwqp->iwdev->suspend_wq); } break; diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index 9ac48b4dab41..3f13200ff71b 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -48,7 +48,7 @@ static void irdma_prep_tc_change(struct irdma_device *iwdev) /* Wait for all qp's to suspend */ wait_event_timeout(iwdev->suspend_wq, !atomic_read(&iwdev->vsi.qp_suspend_reqs), - IRDMA_EVENT_TIMEOUT); + msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS)); irdma_ws_reset(&iwdev->vsi); } diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index d66d87bb8bc4..b65bc2ea542f 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -78,7 +78,7 @@ extern struct auxiliary_driver i40iw_auxiliary_drv; #define MAX_DPC_ITERATIONS 128 -#define IRDMA_EVENT_TIMEOUT 50000 +#define IRDMA_EVENT_TIMEOUT_MS 5000 #define IRDMA_VCHNL_EVENT_TIMEOUT 100000 #define IRDMA_RST_TIMEOUT_HZ 4 diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 36e69e6ca9f8..5fa88e6cca4e 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -1157,6 +1157,21 @@ exit: return prio; } +static int irdma_wait_for_suspend(struct irdma_qp *iwqp) +{ + if (!wait_event_timeout(iwqp->iwdev->suspend_wq, + !iwqp->suspend_pending, + msecs_to_jiffies(IRDMA_EVENT_TIMEOUT_MS))) { + iwqp->suspend_pending = false; + ibdev_warn(&iwqp->iwdev->ibdev, + "modify_qp timed out waiting for suspend. qp_id = %d, last_ae = 0x%x\n", + iwqp->ibqp.qp_num, iwqp->last_aeq); + return -EBUSY; + } + + return 0; +} + /** * irdma_modify_qp_roce - modify qp request * @ibqp: qp's pointer for modify @@ -1420,6 +1435,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, info.next_iwarp_state = IRDMA_QP_STATE_SQD; issue_modify_qp = 1; + iwqp->suspend_pending = true; break; case IB_QPS_SQE: case IB_QPS_ERR: @@ -1460,6 +1476,11 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, ctx_info->rem_endpoint_idx = udp_info->arp_idx; if (irdma_hw_modify_qp(iwdev, iwqp, &info, true)) return -EINVAL; + if (info.next_iwarp_state == IRDMA_QP_STATE_SQD) { + ret = irdma_wait_for_suspend(iwqp); + if (ret) + return ret; + } spin_lock_irqsave(&iwqp->lock, flags); if (iwqp->iwarp_state == info.curr_iwarp_state) { iwqp->iwarp_state = info.next_iwarp_state; diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h index c42ac22de00e..cfa140b36395 100644 --- a/drivers/infiniband/hw/irdma/verbs.h +++ b/drivers/infiniband/hw/irdma/verbs.h @@ -198,6 +198,7 @@ struct irdma_qp { u8 flush_issued : 1; u8 sig_all : 1; u8 pau_mode : 1; + u8 suspend_pending : 1; u8 rsvd : 1; u8 iwarp_state; u16 term_sq_flush_code; -- cgit v1.2.3 From 3ee7ecd712048ade6482bea4b2f3dcaf039c0348 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 20 Nov 2023 16:41:38 +0100 Subject: RDMA/rtrs-srv: Do not unconditionally enable irq When IO is completed, rtrs can be called in softirq context, unconditionally enabling irq could cause panic. To be on safe side, use spin_lock_irqsave and spin_unlock_irqrestore instread. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Signed-off-by: Jack Wang Signed-off-by: Florian-Ewald Mueller Signed-off-by: Md Haris Iqbal Signed-off-by: Grzegorz Prajsner Link: https://lore.kernel.org/r/20231120154146.920486-2-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 75e56604e462..ab4200041fd3 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -65,8 +65,9 @@ static bool rtrs_srv_change_state(struct rtrs_srv_path *srv_path, { enum rtrs_srv_state old_state; bool changed = false; + unsigned long flags; - spin_lock_irq(&srv_path->state_lock); + spin_lock_irqsave(&srv_path->state_lock, flags); old_state = srv_path->state; switch (new_state) { case RTRS_SRV_CONNECTED: @@ -87,7 +88,7 @@ static bool rtrs_srv_change_state(struct rtrs_srv_path *srv_path, } if (changed) srv_path->state = new_state; - spin_unlock_irq(&srv_path->state_lock); + spin_unlock_irqrestore(&srv_path->state_lock, flags); return changed; } -- cgit v1.2.3 From 3e44a61b5db873612e20e7b7922468d7d1ac2d22 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 20 Nov 2023 16:41:39 +0100 Subject: RDMA/rtrs-clt: Start hb after path_up If we start hb too early, it will confuse server side to close the session. Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Signed-off-by: Jack Wang Reviewed-by: Md Haris Iqbal Signed-off-by: Grzegorz Prajsner Link: https://lore.kernel.org/r/20231120154146.920486-3-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 07261523c554..9bf5f7fb7714 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -2350,8 +2350,6 @@ static int init_conns(struct rtrs_clt_path *clt_path) if (err) goto destroy; - rtrs_start_hb(&clt_path->s); - return 0; destroy: @@ -2625,6 +2623,7 @@ static int init_path(struct rtrs_clt_path *clt_path) goto out; } rtrs_clt_path_up(clt_path); + rtrs_start_hb(&clt_path->s); out: mutex_unlock(&clt_path->init_mutex); -- cgit v1.2.3 From ed1e52aefa16f15dc2f04054a3baf11726a7460e Mon Sep 17 00:00:00 2001 From: Md Haris Iqbal Date: Mon, 20 Nov 2023 16:41:40 +0100 Subject: RDMA/rtrs-srv: Check return values while processing info request While processing info request, it could so happen that the srv_path goes to CLOSING state, cause of any of the error events from RDMA. That state change should be picked up while trying to change the state in process_info_req, by checking the return value. In case the state change call in process_info_req fails, we fail the processing. We should also check the return value for rtrs_srv_path_up, since it sends a link event to the client above, and the client can fail for any reason. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Signed-off-by: Md Haris Iqbal Signed-off-by: Jack Wang Signed-off-by: Grzegorz Prajsner Link: https://lore.kernel.org/r/20231120154146.920486-4-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index ab4200041fd3..4be0e5b132d4 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -710,20 +710,23 @@ static void rtrs_srv_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) WARN_ON(wc->opcode != IB_WC_SEND); } -static void rtrs_srv_path_up(struct rtrs_srv_path *srv_path) +static int rtrs_srv_path_up(struct rtrs_srv_path *srv_path) { struct rtrs_srv_sess *srv = srv_path->srv; struct rtrs_srv_ctx *ctx = srv->ctx; - int up; + int up, ret = 0; mutex_lock(&srv->paths_ev_mutex); up = ++srv->paths_up; if (up == 1) - ctx->ops.link_ev(srv, RTRS_SRV_LINK_EV_CONNECTED, NULL); + ret = ctx->ops.link_ev(srv, RTRS_SRV_LINK_EV_CONNECTED, NULL); mutex_unlock(&srv->paths_ev_mutex); /* Mark session as established */ - srv_path->established = true; + if (!ret) + srv_path->established = true; + + return ret; } static void rtrs_srv_path_down(struct rtrs_srv_path *srv_path) @@ -852,7 +855,12 @@ static int process_info_req(struct rtrs_srv_con *con, goto iu_free; kobject_get(&srv_path->kobj); get_device(&srv_path->srv->dev); - rtrs_srv_change_state(srv_path, RTRS_SRV_CONNECTED); + err = rtrs_srv_change_state(srv_path, RTRS_SRV_CONNECTED); + if (!err) { + rtrs_err(s, "rtrs_srv_change_state(), err: %d\n", err); + goto iu_free; + } + rtrs_srv_start_hb(srv_path); /* @@ -861,7 +869,11 @@ static int process_info_req(struct rtrs_srv_con *con, * all connections are successfully established. Thus, simply notify * listener with a proper event if we are the first path. */ - rtrs_srv_path_up(srv_path); + err = rtrs_srv_path_up(srv_path); + if (err) { + rtrs_err(s, "rtrs_srv_path_up(), err: %d\n", err); + goto iu_free; + } ib_dma_sync_single_for_device(srv_path->s.dev->ib_dev, tx_iu->dma_addr, -- cgit v1.2.3 From 3a71cd6ca0ce33d1af019ecf1d7167406fa54400 Mon Sep 17 00:00:00 2001 From: Md Haris Iqbal Date: Mon, 20 Nov 2023 16:41:41 +0100 Subject: RDMA/rtrs-srv: Free srv_mr iu only when always_invalidate is true Since srv_mr->iu is allocated and used only when always_invalidate is true, free it only when always_invalidate is true. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Signed-off-by: Md Haris Iqbal Signed-off-by: Jack Wang Signed-off-by: Grzegorz Prajsner Link: https://lore.kernel.org/r/20231120154146.920486-5-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 4be0e5b132d4..925b71481c62 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -551,7 +551,10 @@ static void unmap_cont_bufs(struct rtrs_srv_path *srv_path) struct rtrs_srv_mr *srv_mr; srv_mr = &srv_path->mrs[i]; - rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1); + + if (always_invalidate) + rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1); + ib_dereg_mr(srv_mr->mr); ib_dma_unmap_sg(srv_path->s.dev->ib_dev, srv_mr->sgt.sgl, srv_mr->sgt.nents, DMA_BIDIRECTIONAL); -- cgit v1.2.3 From c4d32e77fc1006f99eeb78417efc3d81a384072a Mon Sep 17 00:00:00 2001 From: Md Haris Iqbal Date: Mon, 20 Nov 2023 16:41:42 +0100 Subject: RDMA/rtrs-srv: Destroy path files after making sure no IOs in-flight Destroying path files may lead to the freeing of rdma_stats. This creates the following race. An IO is in-flight, or has just passed the session state check in process_read/process_write. The close_work gets triggered and the function rtrs_srv_close_work() starts and does destroy path which frees the rdma_stats. After this the function process_read/process_write resumes and tries to update the stats through the function rtrs_srv_update_rdma_stats This commit solves the problem by moving the destroy path function to a later point. This point makes sure any inflights are completed. This is done by qp drain, and waiting for all in-flights through ops_id. Fixes: 9cb837480424 ("RDMA/rtrs: server: main functionality") Signed-off-by: Md Haris Iqbal Signed-off-by: Santosh Kumar Pradhan Signed-off-by: Grzegorz Prajsner Link: https://lore.kernel.org/r/20231120154146.920486-6-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-srv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 925b71481c62..1d33efb8fb03 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -1532,7 +1532,6 @@ static void rtrs_srv_close_work(struct work_struct *work) srv_path = container_of(work, typeof(*srv_path), close_work); - rtrs_srv_destroy_path_files(srv_path); rtrs_srv_stop_hb(srv_path); for (i = 0; i < srv_path->s.con_num; i++) { @@ -1552,6 +1551,8 @@ static void rtrs_srv_close_work(struct work_struct *work) /* Wait for all completion */ wait_for_completion(&srv_path->complete_done); + rtrs_srv_destroy_path_files(srv_path); + /* Notify upper layer if we are the last path */ rtrs_srv_path_down(srv_path); -- cgit v1.2.3 From 6d09f6f7d7584e099633282ea915988914f86529 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 20 Nov 2023 16:41:43 +0100 Subject: RDMA/rtrs-clt: Fix the max_send_wr setting For each write request, we need Request, Response Memory Registration, Local Invalidate. Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Signed-off-by: Jack Wang Reviewed-by: Md Haris Iqbal Signed-off-by: Grzegorz Prajsner Link: https://lore.kernel.org/r/20231120154146.920486-7-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 9bf5f7fb7714..16535030b442 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1699,7 +1699,7 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) clt_path->s.dev_ref++; max_send_wr = min_t(int, wr_limit, /* QD * (REQ + RSP + FR REGS or INVS) + drain */ - clt_path->queue_depth * 3 + 1); + clt_path->queue_depth * 4 + 1); max_recv_wr = min_t(int, wr_limit, clt_path->queue_depth * 3 + 1); max_send_sge = 2; -- cgit v1.2.3 From 0c8bb6eb70ca41031f663b4481aac9ac78b53bc6 Mon Sep 17 00:00:00 2001 From: Jack Wang Date: Mon, 20 Nov 2023 16:41:44 +0100 Subject: RDMA/rtrs-clt: Remove the warnings for req in_use check As we chain the WR during write request: memory registration, rdma write, local invalidate, if only the last WR fail to send due to send queue overrun, the server can send back the reply, while client mark the req->in_use to false in case of error in rtrs_clt_req when error out from rtrs_post_rdma_write_sg. Fixes: 6a98d71daea1 ("RDMA/rtrs: client: main functionality") Signed-off-by: Jack Wang Reviewed-by: Md Haris Iqbal Signed-off-by: Grzegorz Prajsner Link: https://lore.kernel.org/r/20231120154146.920486-8-haris.iqbal@ionos.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/rtrs/rtrs-clt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 16535030b442..7f3167ce2972 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -384,7 +384,7 @@ static void complete_rdma_req(struct rtrs_clt_io_req *req, int errno, struct rtrs_clt_path *clt_path; int err; - if (WARN_ON(!req->in_use)) + if (!req->in_use) return; if (WARN_ON(!req->con)) return; -- cgit v1.2.3 From 422b19f7f006e813ee0865aadce6a62b3c263c42 Mon Sep 17 00:00:00 2001 From: Kalesh AP Date: Tue, 21 Nov 2023 00:29:47 -0800 Subject: RDMA/bnxt_re: Correct module description string The word "Driver" is repeated twice in the "modinfo bnxt_re" output description. Fix it. Fixes: 1ac5a4047975 ("RDMA/bnxt_re: Add bnxt_re RoCE driver") Signed-off-by: Kalesh AP Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1700555387-6277-1-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index f79369c8360a..a99c68247af0 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -71,7 +71,7 @@ static char version[] = BNXT_RE_DESC "\n"; MODULE_AUTHOR("Eddie Wai "); -MODULE_DESCRIPTION(BNXT_RE_DESC " Driver"); +MODULE_DESCRIPTION(BNXT_RE_DESC); MODULE_LICENSE("Dual BSD/GPL"); /* globals */ -- cgit v1.2.3 From 2b78832f50c4d711e161b166d7d8790968051546 Mon Sep 17 00:00:00 2001 From: Shifeng Li Date: Tue, 21 Nov 2023 02:12:36 -0800 Subject: RDMA/irdma: Fix UAF in irdma_sc_ccq_get_cqe_info() When removing the irdma driver or unplugging its aux device, the ccq queue is released before destorying the cqp_cmpl_wq queue. But in the window, there may still be completion events for wqes. That will cause a UAF in irdma_sc_ccq_get_cqe_info(). [34693.333191] BUG: KASAN: use-after-free in irdma_sc_ccq_get_cqe_info+0x82f/0x8c0 [irdma] [34693.333194] Read of size 8 at addr ffff889097f80818 by task kworker/u67:1/26327 [34693.333194] [34693.333199] CPU: 9 PID: 26327 Comm: kworker/u67:1 Kdump: loaded Tainted: G O --------- -t - 4.18.0 #1 [34693.333200] Hardware name: SANGFOR Inspur/NULL, BIOS 4.1.13 08/01/2016 [34693.333211] Workqueue: cqp_cmpl_wq cqp_compl_worker [irdma] [34693.333213] Call Trace: [34693.333220] dump_stack+0x71/0xab [34693.333226] print_address_description+0x6b/0x290 [34693.333238] ? irdma_sc_ccq_get_cqe_info+0x82f/0x8c0 [irdma] [34693.333240] kasan_report+0x14a/0x2b0 [34693.333251] irdma_sc_ccq_get_cqe_info+0x82f/0x8c0 [irdma] [34693.333264] ? irdma_free_cqp_request+0x151/0x1e0 [irdma] [34693.333274] irdma_cqp_ce_handler+0x1fb/0x3b0 [irdma] [34693.333285] ? irdma_ctrl_init_hw+0x2c20/0x2c20 [irdma] [34693.333290] ? __schedule+0x836/0x1570 [34693.333293] ? strscpy+0x83/0x180 [34693.333296] process_one_work+0x56a/0x11f0 [34693.333298] worker_thread+0x8f/0xf40 [34693.333301] ? __kthread_parkme+0x78/0xf0 [34693.333303] ? rescuer_thread+0xc50/0xc50 [34693.333305] kthread+0x2a0/0x390 [34693.333308] ? kthread_destroy_worker+0x90/0x90 [34693.333310] ret_from_fork+0x1f/0x40 Fixes: 44d9e52977a1 ("RDMA/irdma: Implement device initialization definitions") Signed-off-by: Shifeng Li Link: https://lore.kernel.org/r/20231121101236.581694-1-lishifeng1992@126.com Acked-by: Shiraz Saleem Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/irdma/hw.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 74f6bc7b7ad1..64a382070e5c 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -585,9 +585,6 @@ static void irdma_destroy_cqp(struct irdma_pci_f *rf) struct irdma_cqp *cqp = &rf->cqp; int status = 0; - if (rf->cqp_cmpl_wq) - destroy_workqueue(rf->cqp_cmpl_wq); - status = irdma_sc_cqp_destroy(dev->cqp); if (status) ibdev_dbg(to_ibdev(dev), "ERR: Destroy CQP failed %d\n", status); @@ -752,6 +749,9 @@ static void irdma_destroy_ccq(struct irdma_pci_f *rf) struct irdma_ccq *ccq = &rf->ccq; int status = 0; + if (rf->cqp_cmpl_wq) + destroy_workqueue(rf->cqp_cmpl_wq); + if (!rf->reset) status = irdma_sc_ccq_destroy(dev->ccq, 0, true); if (status) -- cgit v1.2.3 From 4fbc3a52cd4d14de3793f4b2c721d7306ea84cf9 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 29 Nov 2023 14:21:41 -0600 Subject: RDMA/core: Fix umem iterator when PAGE_SIZE is greater then HCA pgsz 64k pages introduce the situation in this diagram when the HCA 4k page size is being used: +-------------------------------------------+ <--- 64k aligned VA | | | HCA 4k page | | | +-------------------------------------------+ | o | | | | o | | | | o | +-------------------------------------------+ | | | HCA 4k page | | | +-------------------------------------------+ <--- Live HCA page |OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO| <--- offset | | <--- VA | MR data | +-------------------------------------------+ | | | HCA 4k page | | | +-------------------------------------------+ | o | | | | o | | | | o | +-------------------------------------------+ | | | HCA 4k page | | | +-------------------------------------------+ The VA addresses are coming from rdma-core in this diagram can be arbitrary, but for 64k pages, the VA may be offset by some number of HCA 4k pages and followed by some number of HCA 4k pages. The current iterator doesn't account for either the preceding 4k pages or the following 4k pages. Fix the issue by extending the ib_block_iter to contain the number of DMA pages like comment [1] says and by using __sg_advance to start the iterator at the first live HCA page. The changes are contained in a parallel set of iterator start and next functions that are umem aware and specific to umem since there is one user of the rdma_for_each_block() without umem. These two fixes prevents the extra pages before and after the user MR data. Fix the preceding pages by using the __sq_advance field to start at the first 4k page containing MR data. Fix the following pages by saving the number of pgsz blocks in the iterator state and downcounting on each next. This fix allows for the elimination of the small page crutch noted in the Fixes. Fixes: 10c75ccb54e4 ("RDMA/umem: Prevent small pages from being returned by ib_umem_find_best_pgsz()") Link: https://lore.kernel.org/r/20231129202143.1434-2-shiraz.saleem@intel.com Signed-off-by: Mike Marciniszyn Signed-off-by: Shiraz Saleem Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 6 ------ include/rdma/ib_umem.h | 9 ++++++++- include/rdma/ib_verbs.h | 1 + 3 files changed, 9 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index f9ab671c8eda..07c571c7b699 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -96,12 +96,6 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, return page_size; } - /* rdma_for_each_block() has a bug if the page size is smaller than the - * page size used to build the umem. For now prevent smaller page sizes - * from being returned. - */ - pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT); - /* The best result is the smallest page size that results in the minimum * number of required pages. Compute the largest page size that could * work based on VA address bits that don't change. diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 95896472a82b..565a85044541 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -77,6 +77,13 @@ static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter, { __rdma_block_iter_start(biter, umem->sgt_append.sgt.sgl, umem->sgt_append.sgt.nents, pgsz); + biter->__sg_advance = ib_umem_offset(umem) & ~(pgsz - 1); + biter->__sg_numblocks = ib_umem_num_dma_blocks(umem, pgsz); +} + +static inline bool __rdma_umem_block_iter_next(struct ib_block_iter *biter) +{ + return __rdma_block_iter_next(biter) && biter->__sg_numblocks--; } /** @@ -92,7 +99,7 @@ static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter, */ #define rdma_umem_for_each_dma_block(umem, biter, pgsz) \ for (__rdma_umem_block_iter_start(biter, umem, pgsz); \ - __rdma_block_iter_next(biter);) + __rdma_umem_block_iter_next(biter);) #ifdef CONFIG_INFINIBAND_USER_MEM diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index fb1a2d6b1969..b7b6b58dd348 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2850,6 +2850,7 @@ struct ib_block_iter { /* internal states */ struct scatterlist *__sg; /* sg holding the current aligned block */ dma_addr_t __dma_addr; /* unaligned DMA address of this block */ + size_t __sg_numblocks; /* ib_umem_num_dma_blocks() */ unsigned int __sg_nents; /* number of SG entries */ unsigned int __sg_advance; /* number of bytes to advance in sg in next step */ unsigned int __pg_bit; /* alignment of current block */ -- cgit v1.2.3 From 0a5ec366de7e94192669ba08de6ed336607fd282 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 29 Nov 2023 14:21:42 -0600 Subject: RDMA/irdma: Ensure iWarp QP queue memory is OS paged aligned The SQ is shared for between kernel and used by storing the kernel page pointer and passing that to a kmap_atomic(). This then requires that the alignment is PAGE_SIZE aligned. Fix by adding an iWarp specific alignment check. Fixes: e965ef0e7b2c ("RDMA/irdma: Split QP handler into irdma_reg_user_mr_type_qp") Link: https://lore.kernel.org/r/20231129202143.1434-3-shiraz.saleem@intel.com Signed-off-by: Mike Marciniszyn Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/verbs.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers') diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 5fa88e6cca4e..fb9088392b19 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2946,6 +2946,11 @@ static int irdma_reg_user_mr_type_qp(struct irdma_mem_reg_req req, int err; u8 lvl; + /* iWarp: Catch page not starting on OS page boundary */ + if (!rdma_protocol_roce(&iwdev->ibdev, 1) && + ib_umem_offset(iwmr->region)) + return -EINVAL; + total = req.sq_pages + req.rq_pages + 1; if (total > iwmr->page_cnt) return -EINVAL; -- cgit v1.2.3 From 03769f72d66edab82484449ed594cb6b00ae0223 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 29 Nov 2023 14:21:43 -0600 Subject: RDMA/irdma: Fix support for 64k pages Virtual QP and CQ require a 4K HW page size but the driver passes PAGE_SIZE to ib_umem_find_best_pgsz() instead. Fix this by using the appropriate 4k value in the bitmap passed to ib_umem_find_best_pgsz(). Fixes: 693a5386eff0 ("RDMA/irdma: Split mr alloc and free into new functions") Link: https://lore.kernel.org/r/20231129202143.1434-4-shiraz.saleem@intel.com Signed-off-by: Mike Marciniszyn Signed-off-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index fb9088392b19..b5eb8d421988 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2914,7 +2914,7 @@ static struct irdma_mr *irdma_alloc_iwmr(struct ib_umem *region, iwmr->type = reg_type; pgsz_bitmap = (reg_type == IRDMA_MEMREG_TYPE_MEM) ? - iwdev->rf->sc_dev.hw_attrs.page_size_cap : PAGE_SIZE; + iwdev->rf->sc_dev.hw_attrs.page_size_cap : SZ_4K; iwmr->page_size = ib_umem_find_best_pgsz(region, pgsz_bitmap, virt); if (unlikely(!iwmr->page_size)) { -- cgit v1.2.3 From e3e82fcb79eeb3f1a88a89f676831773caff514a Mon Sep 17 00:00:00 2001 From: Shifeng Li Date: Thu, 30 Nov 2023 00:14:15 -0800 Subject: RDMA/irdma: Avoid free the non-cqp_request scratch When creating ceq_0 during probing irdma, cqp.sc_cqp will be sent as a cqp_request to cqp->sc_cqp.sq_ring. If the request is pending when removing the irdma driver or unplugging its aux device, cqp.sc_cqp will be dereferenced as wrong struct in irdma_free_pending_cqp_request(). PID: 3669 TASK: ffff88aef892c000 CPU: 28 COMMAND: "kworker/28:0" #0 [fffffe0000549e38] crash_nmi_callback at ffffffff810e3a34 #1 [fffffe0000549e40] nmi_handle at ffffffff810788b2 #2 [fffffe0000549ea0] default_do_nmi at ffffffff8107938f #3 [fffffe0000549eb8] do_nmi at ffffffff81079582 #4 [fffffe0000549ef0] end_repeat_nmi at ffffffff82e016b4 [exception RIP: native_queued_spin_lock_slowpath+1291] RIP: ffffffff8127e72b RSP: ffff88aa841ef778 RFLAGS: 00000046 RAX: 0000000000000000 RBX: ffff88b01f849700 RCX: ffffffff8127e47e RDX: 0000000000000000 RSI: 0000000000000004 RDI: ffffffff83857ec0 RBP: ffff88afe3e4efc8 R8: ffffed15fc7c9dfa R9: ffffed15fc7c9dfa R10: 0000000000000001 R11: ffffed15fc7c9df9 R12: 0000000000740000 R13: ffff88b01f849708 R14: 0000000000000003 R15: ffffed1603f092e1 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0000 -- -- #5 [ffff88aa841ef778] native_queued_spin_lock_slowpath at ffffffff8127e72b #6 [ffff88aa841ef7b0] _raw_spin_lock_irqsave at ffffffff82c22aa4 #7 [ffff88aa841ef7c8] __wake_up_common_lock at ffffffff81257363 #8 [ffff88aa841ef888] irdma_free_pending_cqp_request at ffffffffa0ba12cc [irdma] #9 [ffff88aa841ef958] irdma_cleanup_pending_cqp_op at ffffffffa0ba1469 [irdma] #10 [ffff88aa841ef9c0] irdma_ctrl_deinit_hw at ffffffffa0b2989f [irdma] #11 [ffff88aa841efa28] irdma_remove at ffffffffa0b252df [irdma] #12 [ffff88aa841efae8] auxiliary_bus_remove at ffffffff8219afdb #13 [ffff88aa841efb00] device_release_driver_internal at ffffffff821882e6 #14 [ffff88aa841efb38] bus_remove_device at ffffffff82184278 #15 [ffff88aa841efb88] device_del at ffffffff82179d23 #16 [ffff88aa841efc48] ice_unplug_aux_dev at ffffffffa0eb1c14 [ice] #17 [ffff88aa841efc68] ice_service_task at ffffffffa0d88201 [ice] #18 [ffff88aa841efde8] process_one_work at ffffffff811c589a #19 [ffff88aa841efe60] worker_thread at ffffffff811c71ff #20 [ffff88aa841eff10] kthread at ffffffff811d87a0 #21 [ffff88aa841eff50] ret_from_fork at ffffffff82e0022f Fixes: 44d9e52977a1 ("RDMA/irdma: Implement device initialization definitions") Link: https://lore.kernel.org/r/20231130081415.891006-1-lishifeng@sangfor.com.cn Suggested-by: "Ismail, Mustafa" Signed-off-by: Shifeng Li Reviewed-by: Shiraz Saleem Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/hw.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 64a382070e5c..bd4b2b896444 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -1184,7 +1184,6 @@ static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, int status; struct irdma_ceq_init_info info = {}; struct irdma_sc_dev *dev = &rf->sc_dev; - u64 scratch; u32 ceq_size; info.ceq_id = ceq_id; @@ -1205,14 +1204,13 @@ static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, iwceq->sc_ceq.ceq_id = ceq_id; info.dev = dev; info.vsi = vsi; - scratch = (uintptr_t)&rf->cqp.sc_cqp; status = irdma_sc_ceq_init(&iwceq->sc_ceq, &info); if (!status) { if (dev->ceq_valid) status = irdma_cqp_ceq_cmd(&rf->sc_dev, &iwceq->sc_ceq, IRDMA_OP_CEQ_CREATE); else - status = irdma_sc_cceq_create(&iwceq->sc_ceq, scratch); + status = irdma_sc_cceq_create(&iwceq->sc_ceq, 0); } if (status) { -- cgit v1.2.3