From e193b7955dfad68035b983a0011f4ef3590c85eb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 23 Aug 2023 13:57:27 -0700 Subject: RDMA/srp: Do not call scsi_done() from srp_abort() After scmd_eh_abort_handler() has called the SCSI LLD eh_abort_handler callback, it performs one of the following actions: * Call scsi_queue_insert(). * Call scsi_finish_command(). * Call scsi_eh_scmd_add(). Hence, SCSI abort handlers must not call scsi_done(). Otherwise all the above actions would trigger a use-after-free. Hence remove the scsi_done() call from srp_abort(). Keep the srp_free_req() call before returning SUCCESS because we may not see the command again if SUCCESS is returned. Cc: Bob Pearson Cc: Shinichiro Kawasaki Fixes: d8536670916a ("IB/srp: Avoid having aborted requests hang") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20230823205727.505681-1-bvanassche@acm.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/ulp/srp/ib_srp.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 1574218764e0..2916e77f589b 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2784,7 +2784,6 @@ static int srp_abort(struct scsi_cmnd *scmnd) u32 tag; u16 ch_idx; struct srp_rdma_ch *ch; - int ret; shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); @@ -2798,19 +2797,14 @@ static int srp_abort(struct scsi_cmnd *scmnd) shost_printk(KERN_ERR, target->scsi_host, "Sending SRP abort for tag %#x\n", tag); if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun, - SRP_TSK_ABORT_TASK, NULL) == 0) - ret = SUCCESS; - else if (target->rport->state == SRP_RPORT_LOST) - ret = FAST_IO_FAIL; - else - ret = FAILED; - if (ret == SUCCESS) { + SRP_TSK_ABORT_TASK, NULL) == 0) { srp_free_req(ch, req, scmnd, 0); - scmnd->result = DID_ABORT << 16; - scsi_done(scmnd); + return SUCCESS; } + if (target->rport->state == SRP_RPORT_LOST) + return FAST_IO_FAIL; - return ret; + return FAILED; } static int srp_reset_device(struct scsi_cmnd *scmnd) -- cgit v1.2.3 From 53a3f777049771496f791504e7dc8ef017cba590 Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Tue, 5 Sep 2023 16:58:22 +0200 Subject: RDMA/siw: Fix connection failure handling In case immediate MPA request processing fails, the newly created endpoint unlinks the listening endpoint and is ready to be dropped. This special case was not handled correctly by the code handling the later TCP socket close, causing a NULL dereference crash in siw_cm_work_handler() when dereferencing a NULL listener. We now also cancel the useless MPA timeout, if immediate MPA request processing fails. This patch furthermore simplifies MPA processing in general: Scheduling a useless TCP socket read in sk_data_ready() upcall is now surpressed, if the socket is already moved out of TCP_ESTABLISHED state. Fixes: 6c52fdc244b5 ("rdma/siw: connection management") Signed-off-by: Bernard Metzler Link: https://lore.kernel.org/r/20230905145822.446263-1-bmt@zurich.ibm.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_cm.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index a2605178f4ed..43e776073f49 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -976,6 +976,7 @@ static void siw_accept_newconn(struct siw_cep *cep) siw_cep_put(cep); new_cep->listen_cep = NULL; if (rv) { + siw_cancel_mpatimer(new_cep); siw_cep_set_free(new_cep); goto error; } @@ -1100,9 +1101,12 @@ static void siw_cm_work_handler(struct work_struct *w) /* * Socket close before MPA request received. */ - siw_dbg_cep(cep, "no mpareq: drop listener\n"); - siw_cep_put(cep->listen_cep); - cep->listen_cep = NULL; + if (cep->listen_cep) { + siw_dbg_cep(cep, + "no mpareq: drop listener\n"); + siw_cep_put(cep->listen_cep); + cep->listen_cep = NULL; + } } } release_cep = 1; @@ -1227,7 +1231,11 @@ static void siw_cm_llp_data_ready(struct sock *sk) if (!cep) goto out; - siw_dbg_cep(cep, "state: %d\n", cep->state); + siw_dbg_cep(cep, "cep state: %d, socket state %d\n", + cep->state, sk->sk_state); + + if (sk->sk_state != TCP_ESTABLISHED) + goto out; switch (cep->state) { case SIW_EPSTATE_RDMA_MODE: -- cgit v1.2.3 From 8fb8a82086f5bda6893ea6557c5a458e4549c6d7 Mon Sep 17 00:00:00 2001 From: Artem Chernyshev Date: Tue, 5 Sep 2023 15:40:48 +0300 Subject: RDMA/cxgb4: Check skb value for failure to allocate get_skb() can fail to allocate skb, so check it. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 5be78ee924ae ("RDMA/cxgb4: Fix LE hash collision bug for active open connection") Signed-off-by: Artem Chernyshev Link: https://lore.kernel.org/r/20230905124048.284165-1-artem.chernyshev@red-soft.ru Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/cxgb4/cm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index ced615b5ea09..040ba2224f9f 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1965,6 +1965,9 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) int win; skb = get_skb(NULL, sizeof(*req), GFP_KERNEL); + if (!skb) + return -ENOMEM; + req = __skb_put_zero(skb, sizeof(*req)); req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR)); req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16))); -- cgit v1.2.3 From c489800e0d48097fc6afebd862c6afa039110a36 Mon Sep 17 00:00:00 2001 From: Konstantin Meskhidze Date: Tue, 5 Sep 2023 18:32:58 +0800 Subject: RDMA/uverbs: Fix typo of sizeof argument Since size of 'hdr' pointer and '*hdr' structure is equal on 64-bit machines issue probably didn't cause any wrong behavior. But anyway, fixing of typo is required. Fixes: da0f60df7bd5 ("RDMA/uverbs: Prohibit write() calls with too small buffers") Co-developed-by: Ivanov Mikhail Signed-off-by: Ivanov Mikhail Signed-off-by: Konstantin Meskhidze Link: https://lore.kernel.org/r/20230905103258.1738246-1-konstantin.meskhidze@huawei.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index bf800f8cb3e4..495d5a5d0373 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -546,7 +546,7 @@ static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr, if (hdr->in_words * 4 != count) return -EINVAL; - if (count < method_elm->req_size + sizeof(hdr)) { + if (count < method_elm->req_size + sizeof(*hdr)) { /* * rdma-core v18 and v19 have a bug where they send DESTROY_CQ * with a 16 byte write instead of 24. Old kernels didn't -- cgit v1.2.3 From 6b5f0749ce48c13d7f53b27c39d00bba46e1fd1c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 6 Sep 2023 14:23:52 +0300 Subject: RDMA/erdma: Fix error code in erdma_create_scatter_mtt() The erdma_create_scatter_mtt() function is supposed to return error pointers. Returning NULL will lead to an Oops. Fixes: ed10435d3583 ("RDMA/erdma: Implement hierarchical MTT") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/1eb400d5-d8a3-4a8e-b3da-c43c6c377f86@moroto.mountain Acked-by: Cheng Xu Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index dcccb6015232..70eaed59a67c 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -655,7 +655,7 @@ static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev, mtt = kzalloc(sizeof(*mtt), GFP_KERNEL); if (!mtt) - return NULL; + return ERR_PTR(-ENOMEM); mtt->size = ALIGN(size, PAGE_SIZE); mtt->buf = vzalloc(mtt->size); -- cgit v1.2.3 From b2abdffb505f7e1bef1a769ba7cbdc819a6fe623 Mon Sep 17 00:00:00 2001 From: Cheng Xu Date: Fri, 8 Sep 2023 14:05:59 +0800 Subject: RDMA/erdma: Fix NULL pointer access in regmr_cmd Fix the crash of regmr_cmd called by erdma_ib_alloc_mr. The reason is that mr->mem.mtt is not initialized but it is accessed in regmr_cmd. The call trace information: BUG: kernel NULL pointer dereference, address: 0000000000000000 <...> RIP: 0010:regmr_cmd+0x170/0x1c0 [erdma] <...> Call Trace: ? __die+0x20/0x70 ? page_fault_oops+0x66/0x150 ? do_user_addr_fault+0x61/0x660 ? exc_page_fault+0x65/0x140 ? asm_exc_page_fault+0x22/0x30 ? regmr_cmd+0x170/0x1c0 [erdma] ? preempt_count_add+0x70/0xa0 ? _raw_spin_lock_irqsave+0x19/0x50 ? _raw_spin_unlock_irqrestore+0x1b/0x40 ? erdma_alloc_idx+0x51/0x90 [erdma] erdma_get_dma_mr+0xa3/0x120 [erdma] __ib_alloc_pd+0xeb/0x1c0 [ib_core] Fixes: 7244b4aa4221 ("RDMA/erdma: Refactor the storage structure of MTT entries") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/3d140c1d-524a-4dbe-a51c-aee4f7ecafdb@moroto.mountain/ Signed-off-by: Cheng Xu Link: https://lore.kernel.org/r/20230908060559.80203-1-chengyou@linux.alibaba.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/erdma/erdma_verbs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index 70eaed59a67c..c317947563fb 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -133,8 +133,8 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp) static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) { struct erdma_pd *pd = to_epd(mr->ibmr.pd); + u32 mtt_level = ERDMA_MR_MTT_0LEVEL; struct erdma_cmdq_reg_mr_req req; - u32 mtt_level; erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_REG_MR); @@ -147,10 +147,9 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) req.phy_addr[0] = sg_dma_address(mr->mem.mtt->sglist); mtt_level = mr->mem.mtt->level; } - } else { + } else if (mr->type != ERDMA_MR_TYPE_DMA) { memcpy(req.phy_addr, mr->mem.mtt->buf, MTT_SIZE(mr->mem.page_cnt)); - mtt_level = ERDMA_MR_MTT_0LEVEL; } req.cfg0 = FIELD_PREP(ERDMA_CMD_MR_VALID_MASK, mr->valid) | -- cgit v1.2.3 From 18126c767658ae8a831257c6cb7776c5ba5e7249 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 11 Sep 2023 15:18:06 +0300 Subject: RDMA/cma: Fix truncation compilation warning in make_cma_ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following compilation error is false alarm as RDMA devices don't have such large amount of ports to actually cause to format truncation. drivers/infiniband/core/cma_configfs.c: In function ‘make_cma_ports’: drivers/infiniband/core/cma_configfs.c:223:57: error: ‘snprintf’ output may be truncated before the last format character [-Werror=format-truncation=] 223 | snprintf(port_str, sizeof(port_str), "%u", i + 1); | ^ drivers/infiniband/core/cma_configfs.c:223:17: note: ‘snprintf’ output between 2 and 11 bytes into a destination of size 10 223 | snprintf(port_str, sizeof(port_str), "%u", i + 1); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors make[5]: *** [scripts/Makefile.build:243: drivers/infiniband/core/cma_configfs.o] Error 1 Fixes: 045959db65c6 ("IB/cma: Add configfs for rdma_cm") Link: https://lore.kernel.org/r/a7e3b347ee134167fa6a3787c56ef231a04bc8c2.1694434639.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma_configfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 7b68b3ea979f..f2fb2d8a6597 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -217,7 +217,7 @@ static int make_cma_ports(struct cma_dev_group *cma_dev_group, return -ENOMEM; for (i = 0; i < ports_num; i++) { - char port_str[10]; + char port_str[11]; ports[i].port_num = i + 1; snprintf(port_str, sizeof(port_str), "%u", i + 1); -- cgit v1.2.3 From 9fc5f9a92fe6897dbed7b9295b234cb7e3cc9d11 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 20 Sep 2023 01:41:19 -0700 Subject: RDMA/bnxt_re: Fix the handling of control path response data Flag that indicate control path command completion should be cleared only after copying the command response data. As soon as the is_in_used flag is clear, the waiting thread can proceed with wrong response data. This wrong data is causing multiple issues like wrong lkey used in data traffic and wrong AH Id etc. Use a memory barrier to ensure that the response data is copied and visible to the process waiting on a different cpu core before clearing the is_in_used flag. Clear the is_in_used after copying the command response. Fixes: bcfee4ce3e01 ("RDMA/bnxt_re: remove redundant cmdq_bitmap") Signed-off-by: Saravanan Vajravel Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1695199280-13520-2-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/qplib_rcfw.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index c8c4017fe405..e47b4ca64d33 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -665,7 +665,6 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, blocked = cookie & RCFW_CMD_IS_BLOCKING; cookie &= RCFW_MAX_COOKIE_VALUE; crsqe = &rcfw->crsqe_tbl[cookie]; - crsqe->is_in_used = false; if (WARN_ONCE(test_bit(FIRMWARE_STALL_DETECTED, &rcfw->cmdq.flags), @@ -681,8 +680,14 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, atomic_dec(&rcfw->timeout_send); if (crsqe->is_waiter_alive) { - if (crsqe->resp) + if (crsqe->resp) { memcpy(crsqe->resp, qp_event, sizeof(*qp_event)); + /* Insert write memory barrier to ensure that + * response data is copied before clearing the + * flags + */ + smp_wmb(); + } if (!blocked) wait_cmds++; } @@ -694,6 +699,8 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, if (!is_waiter_alive) crsqe->resp = NULL; + crsqe->is_in_used = false; + hwq->cons += req_size; /* This is a case to handle below scenario - -- cgit v1.2.3 From a83c69278975227b689b4a016d1fc8e4820756e9 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Wed, 20 Sep 2023 01:41:20 -0700 Subject: RDMA/bnxt_re: Decrement resource stats correctly rc_qp_count and ud_qp_count is not decremented during qp destroy. Fix this. Fixes: cb95709e0dca ("bnxt_re: Update the hw counters for resource stats") Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1695199280-13520-3-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 0848c2c2ffcf..faa88d12ee86 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -910,6 +910,10 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) list_del(&qp->list); mutex_unlock(&rdev->qp_lock); atomic_dec(&rdev->stats.res.qp_count); + if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_RC) + atomic_dec(&rdev->stats.res.rc_qp_count); + else if (qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD) + atomic_dec(&rdev->stats.res.ud_qp_count); ib_umem_release(qp->rumem); ib_umem_release(qp->sumem); -- cgit v1.2.3 From d7f393430a17c2bfcdf805462a5aa80be4285b27 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 23 Sep 2023 07:55:56 +0200 Subject: IB/mlx4: Fix the size of a buffer in add_port_entries() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to be sure that 'buff' is never truncated, its size should be 12, not 11. When building with W=1, this fixes the following warnings: drivers/infiniband/hw/mlx4/sysfs.c: In function ‘add_port_entries’: drivers/infiniband/hw/mlx4/sysfs.c:268:34: error: ‘sprintf’ may write a terminating nul past the end of the destination [-Werror=format-overflow=] 268 | sprintf(buff, "%d", i); | ^ drivers/infiniband/hw/mlx4/sysfs.c:268:17: note: ‘sprintf’ output between 2 and 12 bytes into a destination of size 11 268 | sprintf(buff, "%d", i); | ^~~~~~~~~~~~~~~~~~~~~~ drivers/infiniband/hw/mlx4/sysfs.c:286:34: error: ‘sprintf’ may write a terminating nul past the end of the destination [-Werror=format-overflow=] 286 | sprintf(buff, "%d", i); | ^ drivers/infiniband/hw/mlx4/sysfs.c:286:17: note: ‘sprintf’ output between 2 and 12 bytes into a destination of size 11 286 | sprintf(buff, "%d", i); | ^~~~~~~~~~~~~~~~~~~~~~ Fixes: c1e7e466120b ("IB/mlx4: Add iov directory in sysfs under the ib device") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/0bb1443eb47308bc9be30232cc23004c4d4cf43e.1695448530.git.christophe.jaillet@wanadoo.fr Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx4/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 24ee79aa2122..88f534cf690e 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -223,7 +223,7 @@ void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num, static int add_port_entries(struct mlx4_ib_dev *device, int port_num) { int i; - char buff[11]; + char buff[12]; struct mlx4_ib_iov_port *port = NULL; int ret = 0 ; struct ib_port_attr attr; -- cgit v1.2.3 From 4f14c6c0213e1def48f0f887d35f44095416c67d Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Wed, 20 Sep 2023 13:01:54 +0300 Subject: RDMA/mlx5: Fix assigning access flags to cache mkeys After the change to use dynamic cache structure, new cache entries can be added and the mkey allocation can no longer assume that all mkeys created for the cache have access_flags equal to zero. Example of a flow that exposes the issue: A user registers MR with RO on a HCA that cannot UMR RO and the mkey is created outside of the cache. When the user deregisters the MR, a new cache entry is created to store mkeys with RO. Later, the user registers 2 MRs with RO. The first MR is reused from the new cache entry. When we try to get the second mkey from the cache we see the entry is empty so we go to the MR cache mkey allocation flow which would have allocated a mkey with no access flags, resulting the user getting a MR without RO. Fixes: dd1b913fb0d0 ("RDMA/mlx5: Cache all user cacheable mkeys on dereg MR flow") Reviewed-by: Edward Srouji Signed-off-by: Michael Guralnik Link: https://lore.kernel.org/r/8a802700b82def3ace3f77cd7a9ad9d734af87e7.1695203958.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3e345ef380f1..10da9e81fa62 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -301,7 +301,8 @@ static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs) static void set_cache_mkc(struct mlx5_cache_ent *ent, void *mkc) { - set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd); + set_mkc_access_pd_addr_fields(mkc, ent->rb_key.access_flags, 0, + ent->dev->umrc.pd); MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET(mkc, mkc, access_mode_1_0, ent->rb_key.access_mode & 0x3); -- cgit v1.2.3 From 2fad8f06a582cd431d398a0b3f9be21d069603ab Mon Sep 17 00:00:00 2001 From: Hamdan Igbaria Date: Wed, 20 Sep 2023 13:01:55 +0300 Subject: RDMA/mlx5: Fix mutex unlocking on error flow for steering anchor creation The mutex was not unlocked on some of the error flows. Moved the unlock location to include all the error flow scenarios. Fixes: e1f4a52ac171 ("RDMA/mlx5: Create an indirect flow table for steering anchor") Reviewed-by: Mark Bloch Signed-off-by: Hamdan Igbaria Link: https://lore.kernel.org/r/1244a69d783da997c0af0b827c622eb00495492e.1695203958.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/fs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 1e419e080b53..520034acf73a 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -2470,8 +2470,8 @@ destroy_res: mlx5_steering_anchor_destroy_res(ft_prio); put_flow_table: put_flow_table(dev, ft_prio, true); - mutex_unlock(&dev->flow_db->lock); free_obj: + mutex_unlock(&dev->flow_db->lock); kfree(obj); return err; -- cgit v1.2.3 From dab994bcc609a172bfdab15a0d4cb7e50e8b5458 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 20 Sep 2023 13:01:56 +0300 Subject: RDMA/mlx5: Fix NULL string error checkpath is complaining about NULL string, change it to 'Unknown'. Fixes: 37aa5c36aa70 ("IB/mlx5: Add UARs write-combining and non-cached mapping") Signed-off-by: Shay Drory Link: https://lore.kernel.org/r/8638e5c14fadbde5fa9961874feae917073af920.1695203958.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index aed5cdea50e6..555629b798b9 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2084,7 +2084,7 @@ static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) case MLX5_IB_MMAP_DEVICE_MEM: return "Device Memory"; default: - return NULL; + return "Unknown"; } } -- cgit v1.2.3 From 374012b0045780b7ad498be62e85153009bb7fe9 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 12 Sep 2023 13:07:45 +0300 Subject: RDMA/mlx5: Fix mkey cache possible deadlock on cleanup Fix the deadlock by refactoring the MR cache cleanup flow to flush the workqueue without holding the rb_lock. This adds a race between cache cleanup and creation of new entries which we solve by denied creation of new entries after cache cleanup started. Lockdep: WARNING: possible circular locking dependency detected [ 2785.326074 ] 6.2.0-rc6_for_upstream_debug_2023_01_31_14_02 #1 Not tainted [ 2785.339778 ] ------------------------------------------------------ [ 2785.340848 ] devlink/53872 is trying to acquire lock: [ 2785.341701 ] ffff888124f8c0c8 ((work_completion)(&(&ent->dwork)->work)){+.+.}-{0:0}, at: __flush_work+0xc8/0x900 [ 2785.343403 ] [ 2785.343403 ] but task is already holding lock: [ 2785.344464 ] ffff88817e8f1260 (&dev->cache.rb_lock){+.+.}-{3:3}, at: mlx5_mkey_cache_cleanup+0x77/0x250 [mlx5_ib] [ 2785.346273 ] [ 2785.346273 ] which lock already depends on the new lock. [ 2785.346273 ] [ 2785.347720 ] [ 2785.347720 ] the existing dependency chain (in reverse order) is: [ 2785.349003 ] [ 2785.349003 ] -> #1 (&dev->cache.rb_lock){+.+.}-{3:3}: [ 2785.350160 ] __mutex_lock+0x14c/0x15c0 [ 2785.350962 ] delayed_cache_work_func+0x2d1/0x610 [mlx5_ib] [ 2785.352044 ] process_one_work+0x7c2/0x1310 [ 2785.352879 ] worker_thread+0x59d/0xec0 [ 2785.353636 ] kthread+0x28f/0x330 [ 2785.354370 ] ret_from_fork+0x1f/0x30 [ 2785.355135 ] [ 2785.355135 ] -> #0 ((work_completion)(&(&ent->dwork)->work)){+.+.}-{0:0}: [ 2785.356515 ] __lock_acquire+0x2d8a/0x5fe0 [ 2785.357349 ] lock_acquire+0x1c1/0x540 [ 2785.358121 ] __flush_work+0xe8/0x900 [ 2785.358852 ] __cancel_work_timer+0x2c7/0x3f0 [ 2785.359711 ] mlx5_mkey_cache_cleanup+0xfb/0x250 [mlx5_ib] [ 2785.360781 ] mlx5_ib_stage_pre_ib_reg_umr_cleanup+0x16/0x30 [mlx5_ib] [ 2785.361969 ] __mlx5_ib_remove+0x68/0x120 [mlx5_ib] [ 2785.362960 ] mlx5r_remove+0x63/0x80 [mlx5_ib] [ 2785.363870 ] auxiliary_bus_remove+0x52/0x70 [ 2785.364715 ] device_release_driver_internal+0x3c1/0x600 [ 2785.365695 ] bus_remove_device+0x2a5/0x560 [ 2785.366525 ] device_del+0x492/0xb80 [ 2785.367276 ] mlx5_detach_device+0x1a9/0x360 [mlx5_core] [ 2785.368615 ] mlx5_unload_one_devl_locked+0x5a/0x110 [mlx5_core] [ 2785.369934 ] mlx5_devlink_reload_down+0x292/0x580 [mlx5_core] [ 2785.371292 ] devlink_reload+0x439/0x590 [ 2785.372075 ] devlink_nl_cmd_reload+0xaef/0xff0 [ 2785.372973 ] genl_family_rcv_msg_doit.isra.0+0x1bd/0x290 [ 2785.374011 ] genl_rcv_msg+0x3ca/0x6c0 [ 2785.374798 ] netlink_rcv_skb+0x12c/0x360 [ 2785.375612 ] genl_rcv+0x24/0x40 [ 2785.376295 ] netlink_unicast+0x438/0x710 [ 2785.377121 ] netlink_sendmsg+0x7a1/0xca0 [ 2785.377926 ] sock_sendmsg+0xc5/0x190 [ 2785.378668 ] __sys_sendto+0x1bc/0x290 [ 2785.379440 ] __x64_sys_sendto+0xdc/0x1b0 [ 2785.380255 ] do_syscall_64+0x3d/0x90 [ 2785.381031 ] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [ 2785.381967 ] [ 2785.381967 ] other info that might help us debug this: [ 2785.381967 ] [ 2785.383448 ] Possible unsafe locking scenario: [ 2785.383448 ] [ 2785.384544 ] CPU0 CPU1 [ 2785.385383 ] ---- ---- [ 2785.386193 ] lock(&dev->cache.rb_lock); [ 2785.386940 ] lock((work_completion)(&(&ent->dwork)->work)); [ 2785.388327 ] lock(&dev->cache.rb_lock); [ 2785.389425 ] lock((work_completion)(&(&ent->dwork)->work)); [ 2785.390414 ] [ 2785.390414 ] *** DEADLOCK *** [ 2785.390414 ] [ 2785.391579 ] 6 locks held by devlink/53872: [ 2785.392341 ] #0: ffffffff84c17a50 (cb_lock){++++}-{3:3}, at: genl_rcv+0x15/0x40 [ 2785.393630 ] #1: ffff888142280218 (&devlink->lock_key){+.+.}-{3:3}, at: devlink_get_from_attrs_lock+0x12d/0x2d0 [ 2785.395324 ] #2: ffff8881422d3c38 (&dev->lock_key){+.+.}-{3:3}, at: mlx5_unload_one_devl_locked+0x4a/0x110 [mlx5_core] [ 2785.397322 ] #3: ffffffffa0e59068 (mlx5_intf_mutex){+.+.}-{3:3}, at: mlx5_detach_device+0x60/0x360 [mlx5_core] [ 2785.399231 ] #4: ffff88810e3cb0e8 (&dev->mutex){....}-{3:3}, at: device_release_driver_internal+0x8d/0x600 [ 2785.400864 ] #5: ffff88817e8f1260 (&dev->cache.rb_lock){+.+.}-{3:3}, at: mlx5_mkey_cache_cleanup+0x77/0x250 [mlx5_ib] Fixes: b95845178328 ("RDMA/mlx5: Change the cache structure to an RB-tree") Signed-off-by: Shay Drory Signed-off-by: Michael Guralnik Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/mr.c | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 16713baf0d06..d1ff98aad162 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -798,6 +798,7 @@ struct mlx5_mkey_cache { struct dentry *fs_root; unsigned long last_add; struct delayed_work remove_ent_dwork; + u8 disable: 1; }; struct mlx5_ib_port_resources { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 10da9e81fa62..433f96459246 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1025,19 +1025,27 @@ void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) if (!dev->cache.wq) return; - cancel_delayed_work_sync(&dev->cache.remove_ent_dwork); mutex_lock(&dev->cache.rb_lock); + dev->cache.disable = true; for (node = rb_first(root); node; node = rb_next(node)) { ent = rb_entry(node, struct mlx5_cache_ent, node); xa_lock_irq(&ent->mkeys); ent->disabled = true; xa_unlock_irq(&ent->mkeys); - cancel_delayed_work_sync(&ent->dwork); } + mutex_unlock(&dev->cache.rb_lock); + + /* + * After all entries are disabled and will not reschedule on WQ, + * flush it and all async commands. + */ + flush_workqueue(dev->cache.wq); mlx5_mkey_cache_debugfs_cleanup(dev); mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); + /* At this point all entries are disabled and have no concurrent work. */ + mutex_lock(&dev->cache.rb_lock); node = rb_first(root); while (node) { ent = rb_entry(node, struct mlx5_cache_ent, node); @@ -1822,6 +1830,10 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev, } mutex_lock(&cache->rb_lock); + if (cache->disable) { + mutex_unlock(&cache->rb_lock); + return 0; + } ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key); if (ent) { if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) { -- cgit v1.2.3 From e0fe97efdb00f0f32b038a4836406a82886aec9c Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 27 Sep 2023 12:05:11 +0300 Subject: RDMA/cma: Initialize ib_sa_multicast structure to 0 when join Initialize the structure to 0 so that it's fields won't have random values. For example fields like rec.traffic_class (as well as rec.flow_label and rec.sl) is used to generate the user AH through: cma_iboe_join_multicast cma_make_mc_event ib_init_ah_from_mcmember And a random traffic_class causes a random IP DSCP in RoCEv2. Fixes: b5de0c60cc30 ("RDMA/cma: Fix use after free race in roce multicast join") Signed-off-by: Mark Zhang Link: https://lore.kernel.org/r/20230927090511.603595-1-markzhang@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index c343edf2f664..1e2cd7c8716e 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4968,7 +4968,7 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, int err = 0; struct sockaddr *addr = (struct sockaddr *)&mc->addr; struct net_device *ndev = NULL; - struct ib_sa_multicast ib; + struct ib_sa_multicast ib = {}; enum ib_gid_type gid_type; bool send_only; -- cgit v1.2.3 From c99a7457e5bb873914a74307ba2df85f6799203b Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 28 Sep 2023 20:20:47 +0300 Subject: RDMA/mlx5: Remove not-used cache disable flag During execution of mlx5_mkey_cache_cleanup(), there is a guarantee that MR are not registered and/or destroyed. It means that we don't need newly introduced cache disable flag. Fixes: 374012b00457 ("RDMA/mlx5: Fix mkey cache possible deadlock on cleanup") Link: https://lore.kernel.org/r/c7e9c9f98c8ae4a7413d97d9349b29f5b0a23dbe.1695921626.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 - drivers/infiniband/hw/mlx5/mr.c | 5 ----- 2 files changed, 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d1ff98aad162..16713baf0d06 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -798,7 +798,6 @@ struct mlx5_mkey_cache { struct dentry *fs_root; unsigned long last_add; struct delayed_work remove_ent_dwork; - u8 disable: 1; }; struct mlx5_ib_port_resources { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 433f96459246..8a3762d9ff58 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1026,7 +1026,6 @@ void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) return; mutex_lock(&dev->cache.rb_lock); - dev->cache.disable = true; for (node = rb_first(root); node; node = rb_next(node)) { ent = rb_entry(node, struct mlx5_cache_ent, node); xa_lock_irq(&ent->mkeys); @@ -1830,10 +1829,6 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev, } mutex_lock(&cache->rb_lock); - if (cache->disable) { - mutex_unlock(&cache->rb_lock); - return 0; - } ent = mkey_cache_ent_from_rb_key(dev, mr->mmkey.rb_key); if (ent) { if (ent->rb_key.ndescs == mr->mmkey.rb_key.ndescs) { -- cgit v1.2.3 From c38d23a54445f9a8aa6831fafc9af0496ba02f9e Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Wed, 4 Oct 2023 21:17:49 +0300 Subject: RDMA/core: Require admin capabilities to set system parameters Like any other set command, require admin permissions to do it. Cc: stable@vger.kernel.org Fixes: 2b34c5580226 ("RDMA/core: Add command to set ib_core device net namspace sharing mode") Link: https://lore.kernel.org/r/75d329fdd7381b52cbdf87910bef16c9965abb1f.1696443438.git.leon@kernel.org Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/nldev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index d5d3e4f0de77..6d1dbc978759 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -2529,6 +2529,7 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { }, [RDMA_NLDEV_CMD_SYS_SET] = { .doit = nldev_set_sys_set_doit, + .flags = RDMA_NL_ADMIN_PERM, }, [RDMA_NLDEV_CMD_STAT_SET] = { .doit = nldev_stat_set_doit, -- cgit v1.2.3