From 9191df0029266cd32ed8f47def22081e18f2d9b8 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 27 Jun 2023 16:43:29 +0200 Subject: RDMA/siw: use vmalloc_array and vcalloc Use vmalloc_array and vcalloc to protect against multiplication overflows. The changes were done using the following Coccinelle semantic patch: // @initialize:ocaml@ @@ let rename alloc = match alloc with "vmalloc" -> "vmalloc_array" | "vzalloc" -> "vcalloc" | _ -> failwith "unknown" @@ size_t e1,e2; constant C1, C2; expression E1, E2, COUNT, x1, x2, x3; typedef u8; typedef __u8; type t = {u8,__u8,char,unsigned char}; identifier alloc = {vmalloc,vzalloc}; fresh identifier realloc = script:ocaml(alloc) { rename alloc }; @@ ( alloc(x1*x2*x3) | alloc(C1 * C2) | alloc((sizeof(t)) * (COUNT), ...) | - alloc((e1) * (e2)) + realloc(e1, e2) | - alloc((e1) * (COUNT)) + realloc(COUNT, e1) | - alloc((E1) * (E2)) + realloc(E1, E2) ) // Link: https://lore.kernel.org/r/20230627144339.144478-15-Julia.Lawall@inria.fr Signed-off-by: Julia Lawall Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_qp.c | 4 ++-- drivers/infiniband/sw/siw/siw_verbs.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 81e9bbd9ebda..47d0197db9a1 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -204,7 +204,7 @@ static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size) { if (irq_size) { irq_size = roundup_pow_of_two(irq_size); - qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe)); + qp->irq = vcalloc(irq_size, sizeof(struct siw_sqe)); if (!qp->irq) { qp->attrs.irq_size = 0; return -ENOMEM; @@ -212,7 +212,7 @@ static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size) } if (orq_size) { orq_size = roundup_pow_of_two(orq_size); - qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe)); + qp->orq = vcalloc(orq_size, sizeof(struct siw_sqe)); if (!qp->orq) { qp->attrs.orq_size = 0; qp->attrs.irq_size = 0; diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 398ec13db624..296d839ee876 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -381,7 +381,7 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, if (udata) qp->sendq = vmalloc_user(num_sqe * sizeof(struct siw_sqe)); else - qp->sendq = vzalloc(num_sqe * sizeof(struct siw_sqe)); + qp->sendq = vcalloc(num_sqe, sizeof(struct siw_sqe)); if (qp->sendq == NULL) { rv = -ENOMEM; @@ -414,7 +414,7 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, qp->recvq = vmalloc_user(num_rqe * sizeof(struct siw_rqe)); else - qp->recvq = vzalloc(num_rqe * sizeof(struct siw_rqe)); + qp->recvq = vcalloc(num_rqe, sizeof(struct siw_rqe)); if (qp->recvq == NULL) { rv = -ENOMEM; @@ -1624,7 +1624,7 @@ int siw_create_srq(struct ib_srq *base_srq, srq->recvq = vmalloc_user(srq->num_rqe * sizeof(struct siw_rqe)); else - srq->recvq = vzalloc(srq->num_rqe * sizeof(struct siw_rqe)); + srq->recvq = vcalloc(srq->num_rqe, sizeof(struct siw_rqe)); if (srq->recvq == NULL) { rv = -ENOMEM; -- cgit v1.2.3 From bad5b6e34ffbaacc77ad28a0f482e33b3929e635 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 17 Jul 2023 11:12:12 -0400 Subject: RDMA/siw: Fabricate a GID on tun and loopback devices LOOPBACK and NONE (tunnel) devices have all-zero MAC addresses. Currently, siw_device_create() falls back to copying the IB device's name in those cases, because an all-zero MAC address breaks the RDMA core address resolution mechanism. However, at the point when siw_device_create() constructs a GID, the ib_device::name field is uninitialized, leaving the MAC address to remain in an all-zero state. Fabricate a random artificial GID for such devices, and ensure this artificial GID is returned for all device query operations. Link: https://lore.kernel.org/r/168960673260.3007.12378736853793339110.stgit@manet.1015granger.net Reported-by: Tom Talpey Fixes: a2d36b02c15d ("RDMA/siw: Enable siw on tunnel devices") Reviewed-by: Bernard Metzler Reviewed-by: Tom Talpey Signed-off-by: Chuck Lever Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw.h | 1 + drivers/infiniband/sw/siw/siw_main.c | 22 ++++++++-------------- drivers/infiniband/sw/siw/siw_verbs.c | 4 ++-- 3 files changed, 11 insertions(+), 16 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 2f3a9cda3850..8b4a710b82bc 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -74,6 +74,7 @@ struct siw_device { u32 vendor_part_id; int numa_node; + char raw_gid[ETH_ALEN]; /* physical port state (only one port per device) */ enum ib_port_state state; diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 65b5cda5457b..f45600d169ae 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -75,8 +75,7 @@ static int siw_device_register(struct siw_device *sdev, const char *name) return rv; } - siw_dbg(base_dev, "HWaddr=%pM\n", sdev->netdev->dev_addr); - + siw_dbg(base_dev, "HWaddr=%pM\n", sdev->raw_gid); return 0; } @@ -313,24 +312,19 @@ static struct siw_device *siw_device_create(struct net_device *netdev) return NULL; base_dev = &sdev->base_dev; - sdev->netdev = netdev; - if (netdev->type != ARPHRD_LOOPBACK && netdev->type != ARPHRD_NONE) { - addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, - netdev->dev_addr); + if (netdev->addr_len) { + memcpy(sdev->raw_gid, netdev->dev_addr, + min_t(unsigned int, netdev->addr_len, ETH_ALEN)); } else { /* - * This device does not have a HW address, - * but connection mangagement lib expects gid != 0 + * This device does not have a HW address, but + * connection mangagement requires a unique gid. */ - size_t len = min_t(size_t, strlen(base_dev->name), 6); - char addr[6] = { }; - - memcpy(addr, base_dev->name, len); - addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, - addr); + eth_random_addr(sdev->raw_gid); } + addrconf_addr_eui48((u8 *)&base_dev->node_guid, sdev->raw_gid); base_dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND); diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 296d839ee876..fadfa70853f3 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -157,7 +157,7 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr, attr->vendor_part_id = sdev->vendor_part_id; addrconf_addr_eui48((u8 *)&attr->sys_image_guid, - sdev->netdev->dev_addr); + sdev->raw_gid); return 0; } @@ -218,7 +218,7 @@ int siw_query_gid(struct ib_device *base_dev, u32 port, int idx, /* subnet_prefix == interface_id == 0; */ memset(gid, 0, sizeof(*gid)); - memcpy(&gid->raw[0], sdev->netdev->dev_addr, 6); + memcpy(gid->raw, sdev->raw_gid, ETH_ALEN); return 0; } -- cgit v1.2.3 From 91f36237b4b9bdce7610c7450a906d46704a566a Mon Sep 17 00:00:00 2001 From: Bernard Metzler Date: Fri, 28 Jul 2023 13:44:18 +0200 Subject: RDMA/siw: Fix tx thread initialization. Immediately removing the siw module after insertion may crash in siw_stop_tx_thread(), if the according thread did not yet had a chance to initialize its wait queue and siw_stop_tx_thread() tries to wakeup that thread. Initializing the threads state before spwaning it fixes it. Reported-by: Guoqing Jiang Signed-off-by: Bernard Metzler Link: https://lore.kernel.org/r/20230728114418.124328-1-bmt@zurich.ibm.com Tested-by: Guoqing Jiang Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw.h | 3 ++- drivers/infiniband/sw/siw/siw_main.c | 40 +++---------------------------- drivers/infiniband/sw/siw/siw_qp_tx.c | 44 ++++++++++++++++++++++++++++++----- 3 files changed, 43 insertions(+), 44 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 8b4a710b82bc..58dddb143b9f 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -531,11 +531,12 @@ void siw_qp_llp_data_ready(struct sock *sk); void siw_qp_llp_write_space(struct sock *sk); /* QP TX path functions */ +int siw_create_tx_threads(void); +void siw_stop_tx_threads(void); int siw_run_sq(void *arg); int siw_qp_sq_process(struct siw_qp *qp); int siw_sq_start(struct siw_qp *qp); int siw_activate_tx(struct siw_qp *qp); -void siw_stop_tx_thread(int nr_cpu); int siw_get_tx_cpu(struct siw_device *sdev); void siw_put_tx_cpu(int cpu); diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index f45600d169ae..d4b6e0106851 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -87,29 +87,6 @@ static void siw_device_cleanup(struct ib_device *base_dev) xa_destroy(&sdev->mem_xa); } -static int siw_create_tx_threads(void) -{ - int cpu, assigned = 0; - - for_each_online_cpu(cpu) { - /* Skip HT cores */ - if (cpu % cpumask_weight(topology_sibling_cpumask(cpu))) - continue; - - siw_tx_thread[cpu] = - kthread_run_on_cpu(siw_run_sq, - (unsigned long *)(long)cpu, - cpu, "siw_tx/%u"); - if (IS_ERR(siw_tx_thread[cpu])) { - siw_tx_thread[cpu] = NULL; - continue; - } - - assigned++; - } - return assigned; -} - static int siw_dev_qualified(struct net_device *netdev) { /* @@ -529,7 +506,6 @@ static struct rdma_link_ops siw_link_ops = { static __init int siw_init_module(void) { int rv; - int nr_cpu; if (SENDPAGE_THRESH < SIW_MAX_INLINE) { pr_info("siw: sendpage threshold too small: %u\n", @@ -574,12 +550,8 @@ static __init int siw_init_module(void) return 0; out_error: - for (nr_cpu = 0; nr_cpu < nr_cpu_ids; nr_cpu++) { - if (siw_tx_thread[nr_cpu]) { - siw_stop_tx_thread(nr_cpu); - siw_tx_thread[nr_cpu] = NULL; - } - } + siw_stop_tx_threads(); + if (siw_crypto_shash) crypto_free_shash(siw_crypto_shash); @@ -593,14 +565,8 @@ out_error: static void __exit siw_exit_module(void) { - int cpu; + siw_stop_tx_threads(); - for_each_possible_cpu(cpu) { - if (siw_tx_thread[cpu]) { - siw_stop_tx_thread(cpu); - siw_tx_thread[cpu] = NULL; - } - } unregister_netdevice_notifier(&siw_netdev_nb); rdma_link_unregister(&siw_link_ops); ib_unregister_driver(RDMA_DRIVER_SIW); diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 7c7a51d36d0c..3ff339eceec3 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -1208,10 +1208,45 @@ struct tx_task_t { static DEFINE_PER_CPU(struct tx_task_t, siw_tx_task_g); -void siw_stop_tx_thread(int nr_cpu) +int siw_create_tx_threads(void) { - kthread_stop(siw_tx_thread[nr_cpu]); - wake_up(&per_cpu(siw_tx_task_g, nr_cpu).waiting); + int cpu, assigned = 0; + + for_each_online_cpu(cpu) { + struct tx_task_t *tx_task; + + /* Skip HT cores */ + if (cpu % cpumask_weight(topology_sibling_cpumask(cpu))) + continue; + + tx_task = &per_cpu(siw_tx_task_g, cpu); + init_llist_head(&tx_task->active); + init_waitqueue_head(&tx_task->waiting); + + siw_tx_thread[cpu] = + kthread_run_on_cpu(siw_run_sq, + (unsigned long *)(long)cpu, + cpu, "siw_tx/%u"); + if (IS_ERR(siw_tx_thread[cpu])) { + siw_tx_thread[cpu] = NULL; + continue; + } + assigned++; + } + return assigned; +} + +void siw_stop_tx_threads(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + if (siw_tx_thread[cpu]) { + kthread_stop(siw_tx_thread[cpu]); + wake_up(&per_cpu(siw_tx_task_g, cpu).waiting); + siw_tx_thread[cpu] = NULL; + } + } } int siw_run_sq(void *data) @@ -1221,9 +1256,6 @@ int siw_run_sq(void *data) struct siw_qp *qp; struct tx_task_t *tx_task = &per_cpu(siw_tx_task_g, nr_cpu); - init_llist_head(&tx_task->active); - init_waitqueue_head(&tx_task->waiting); - while (1) { struct llist_node *fifo_list = NULL; -- cgit v1.2.3 From e0ba8ff46704fc924e2ef0451ba196cbdc0d68f2 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Tue, 20 Jun 2023 08:55:19 -0500 Subject: RDMA/rxe: Move work queue code to subroutines This patch: - Moves code to initialize a qp send work queue to a subroutine named rxe_init_sq. - Moves code to initialize a qp recv work queue to a subroutine named rxe_init_rq. - Moves initialization of qp request and response packet queues ahead of work queue initialization so that cleanup of a qp if it is not fully completed can successfully attempt to drain the packet queues without a seg fault. - Makes minor whitespace cleanups. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20230620135519.9365-2-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Acked-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_qp.c | 159 +++++++++++++++++++++++++------------ 1 file changed, 108 insertions(+), 51 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index a569b111a9d2..28e379c108bc 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -183,13 +183,63 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, atomic_set(&qp->skb_out, 0); } +static int rxe_init_sq(struct rxe_qp *qp, struct ib_qp_init_attr *init, + struct ib_udata *udata, + struct rxe_create_qp_resp __user *uresp) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + int wqe_size; + int err; + + qp->sq.max_wr = init->cap.max_send_wr; + wqe_size = max_t(int, init->cap.max_send_sge * sizeof(struct ib_sge), + init->cap.max_inline_data); + qp->sq.max_sge = wqe_size / sizeof(struct ib_sge); + qp->sq.max_inline = wqe_size; + wqe_size += sizeof(struct rxe_send_wqe); + + qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr, wqe_size, + QUEUE_TYPE_FROM_CLIENT); + if (!qp->sq.queue) { + rxe_err_qp(qp, "Unable to allocate send queue"); + err = -ENOMEM; + goto err_out; + } + + /* prepare info for caller to mmap send queue if user space qp */ + err = do_mmap_info(rxe, uresp ? &uresp->sq_mi : NULL, udata, + qp->sq.queue->buf, qp->sq.queue->buf_size, + &qp->sq.queue->ip); + if (err) { + rxe_err_qp(qp, "do_mmap_info failed, err = %d", err); + goto err_free; + } + + /* return actual capabilities to caller which may be larger + * than requested + */ + init->cap.max_send_wr = qp->sq.max_wr; + init->cap.max_send_sge = qp->sq.max_sge; + init->cap.max_inline_data = qp->sq.max_inline; + + return 0; + +err_free: + vfree(qp->sq.queue->buf); + kfree(qp->sq.queue); + qp->sq.queue = NULL; +err_out: + return err; +} + static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, struct ib_qp_init_attr *init, struct ib_udata *udata, struct rxe_create_qp_resp __user *uresp) { int err; - int wqe_size; - enum queue_type type; + + /* if we don't finish qp create make sure queue is valid */ + skb_queue_head_init(&qp->req_pkts); err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk); if (err < 0) @@ -204,32 +254,10 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, * (0xc000 - 0xffff). */ qp->src_port = RXE_ROCE_V2_SPORT + (hash_32(qp_num(qp), 14) & 0x3fff); - qp->sq.max_wr = init->cap.max_send_wr; - - /* These caps are limited by rxe_qp_chk_cap() done by the caller */ - wqe_size = max_t(int, init->cap.max_send_sge * sizeof(struct ib_sge), - init->cap.max_inline_data); - qp->sq.max_sge = init->cap.max_send_sge = - wqe_size / sizeof(struct ib_sge); - qp->sq.max_inline = init->cap.max_inline_data = wqe_size; - wqe_size += sizeof(struct rxe_send_wqe); - type = QUEUE_TYPE_FROM_CLIENT; - qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr, - wqe_size, type); - if (!qp->sq.queue) - return -ENOMEM; - - err = do_mmap_info(rxe, uresp ? &uresp->sq_mi : NULL, udata, - qp->sq.queue->buf, qp->sq.queue->buf_size, - &qp->sq.queue->ip); - - if (err) { - vfree(qp->sq.queue->buf); - kfree(qp->sq.queue); - qp->sq.queue = NULL; + err = rxe_init_sq(qp, init, udata, uresp); + if (err) return err; - } qp->req.wqe_index = queue_get_producer(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT); @@ -248,36 +276,65 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, return 0; } +static int rxe_init_rq(struct rxe_qp *qp, struct ib_qp_init_attr *init, + struct ib_udata *udata, + struct rxe_create_qp_resp __user *uresp) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + int wqe_size; + int err; + + qp->rq.max_wr = init->cap.max_recv_wr; + qp->rq.max_sge = init->cap.max_recv_sge; + wqe_size = sizeof(struct rxe_recv_wqe) + + qp->rq.max_sge*sizeof(struct ib_sge); + + qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr, wqe_size, + QUEUE_TYPE_FROM_CLIENT); + if (!qp->rq.queue) { + rxe_err_qp(qp, "Unable to allocate recv queue"); + err = -ENOMEM; + goto err_out; + } + + /* prepare info for caller to mmap recv queue if user space qp */ + err = do_mmap_info(rxe, uresp ? &uresp->rq_mi : NULL, udata, + qp->rq.queue->buf, qp->rq.queue->buf_size, + &qp->rq.queue->ip); + if (err) { + rxe_err_qp(qp, "do_mmap_info failed, err = %d", err); + goto err_free; + } + + /* return actual capabilities to caller which may be larger + * than requested + */ + init->cap.max_recv_wr = qp->rq.max_wr; + + return 0; + +err_free: + vfree(qp->rq.queue->buf); + kfree(qp->rq.queue); + qp->rq.queue = NULL; +err_out: + return err; +} + static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, struct ib_qp_init_attr *init, struct ib_udata *udata, struct rxe_create_qp_resp __user *uresp) { int err; - int wqe_size; - enum queue_type type; + + /* if we don't finish qp create make sure queue is valid */ + skb_queue_head_init(&qp->resp_pkts); if (!qp->srq) { - qp->rq.max_wr = init->cap.max_recv_wr; - qp->rq.max_sge = init->cap.max_recv_sge; - - wqe_size = rcv_wqe_size(qp->rq.max_sge); - - type = QUEUE_TYPE_FROM_CLIENT; - qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr, - wqe_size, type); - if (!qp->rq.queue) - return -ENOMEM; - - err = do_mmap_info(rxe, uresp ? &uresp->rq_mi : NULL, udata, - qp->rq.queue->buf, qp->rq.queue->buf_size, - &qp->rq.queue->ip); - if (err) { - vfree(qp->rq.queue->buf); - kfree(qp->rq.queue); - qp->rq.queue = NULL; + err = rxe_init_rq(qp, init, udata, uresp); + if (err) return err; - } } rxe_init_task(&qp->resp.task, qp, rxe_responder); @@ -307,10 +364,10 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, if (srq) rxe_get(srq); - qp->pd = pd; - qp->rcq = rcq; - qp->scq = scq; - qp->srq = srq; + qp->pd = pd; + qp->rcq = rcq; + qp->scq = scq; + qp->srq = srq; atomic_inc(&rcq->num_wq); atomic_inc(&scq->num_wq); -- cgit v1.2.3 From 5993b75d0bc71cd2b441d174b028fc36180f032c Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Tue, 20 Jun 2023 08:55:21 -0500 Subject: RDMA/rxe: Fix unsafe drain work queue code If create_qp does not fully succeed it is possible for qp cleanup code to attempt to drain the send or recv work queues before the queues have been created causing a seg fault. This patch checks to see if the queues exist before attempting to drain them. Link: https://lore.kernel.org/r/20230620135519.9365-3-rpearsonhpe@gmail.com Reported-by: syzbot+2da1965168e7dbcba136@syzkaller.appspotmail.com Closes: https://lore.kernel.org/linux-rdma/00000000000012d89205fe7cfe00@google.com/raw Fixes: 49dc9c1f0c7e ("RDMA/rxe: Cleanup reset state handling in rxe_resp.c") Fixes: fbdeb828a21f ("RDMA/rxe: Cleanup error state handling in rxe_comp.c") Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_comp.c | 4 ++++ drivers/infiniband/sw/rxe/rxe_resp.c | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 5111735aafae..d0bdc2d8adc8 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -597,6 +597,10 @@ static void flush_send_queue(struct rxe_qp *qp, bool notify) struct rxe_queue *q = qp->sq.queue; int err; + /* send queue never got created. nothing to do. */ + if (!qp->sq.queue) + return; + while ((wqe = queue_head(q, q->type))) { if (notify) { err = flush_send_wqe(qp, wqe); diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 64c64f5f36a8..da470a925efc 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -1469,6 +1469,10 @@ static void flush_recv_queue(struct rxe_qp *qp, bool notify) return; } + /* recv queue not created. nothing to do. */ + if (!qp->rq.queue) + return; + while ((wqe = queue_head(q, q->type))) { if (notify) { err = flush_recv_wqe(qp, wqe); -- cgit v1.2.3 From cc28f351155def8db209647f2e20a59a7080825b Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Tue, 20 Jun 2023 09:01:43 -0500 Subject: RDMA/rxe: Fix rxe_modify_srq This patch corrects an error in rxe_modify_srq where if the caller changes the srq size the actual new value is not returned to the caller since it may be larger than what is requested. Additionally it open codes the subroutine rcv_wqe_size() which adds very little value, and makes some whitespace changes. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20230620140142.9452-1-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_loc.h | 6 ---- drivers/infiniband/sw/rxe/rxe_srq.c | 60 ++++++++++++++++++++++--------------- 2 files changed, 36 insertions(+), 30 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 666e06a82bc9..4d2a8ef52c85 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -136,12 +136,6 @@ static inline int qp_mtu(struct rxe_qp *qp) return IB_MTU_4096; } -static inline int rcv_wqe_size(int max_sge) -{ - return sizeof(struct rxe_recv_wqe) + - max_sge * sizeof(struct ib_sge); -} - void free_rd_atomic_resource(struct resp_res *res); static inline void rxe_advance_resp_resource(struct rxe_qp *qp) diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c index 27ca82ec0826..3661cb627d28 100644 --- a/drivers/infiniband/sw/rxe/rxe_srq.c +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -45,40 +45,41 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_init_attr *init, struct ib_udata *udata, struct rxe_create_srq_resp __user *uresp) { - int err; - int srq_wqe_size; struct rxe_queue *q; - enum queue_type type; + int wqe_size; + int err; - srq->ibsrq.event_handler = init->event_handler; - srq->ibsrq.srq_context = init->srq_context; - srq->limit = init->attr.srq_limit; - srq->srq_num = srq->elem.index; - srq->rq.max_wr = init->attr.max_wr; - srq->rq.max_sge = init->attr.max_sge; + srq->ibsrq.event_handler = init->event_handler; + srq->ibsrq.srq_context = init->srq_context; + srq->limit = init->attr.srq_limit; + srq->srq_num = srq->elem.index; + srq->rq.max_wr = init->attr.max_wr; + srq->rq.max_sge = init->attr.max_sge; - srq_wqe_size = rcv_wqe_size(srq->rq.max_sge); + wqe_size = sizeof(struct rxe_recv_wqe) + + srq->rq.max_sge*sizeof(struct ib_sge); spin_lock_init(&srq->rq.producer_lock); spin_lock_init(&srq->rq.consumer_lock); - type = QUEUE_TYPE_FROM_CLIENT; - q = rxe_queue_init(rxe, &srq->rq.max_wr, srq_wqe_size, type); + q = rxe_queue_init(rxe, &srq->rq.max_wr, wqe_size, + QUEUE_TYPE_FROM_CLIENT); if (!q) { rxe_dbg_srq(srq, "Unable to allocate queue\n"); - return -ENOMEM; + err = -ENOMEM; + goto err_out; } - srq->rq.queue = q; - err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, udata, q->buf, q->buf_size, &q->ip); if (err) { - vfree(q->buf); - kfree(q); - return err; + rxe_dbg_srq(srq, "Unable to init mmap info for caller\n"); + goto err_free; } + srq->rq.queue = q; + init->attr.max_wr = srq->rq.max_wr; + if (uresp) { if (copy_to_user(&uresp->srq_num, &srq->srq_num, sizeof(uresp->srq_num))) { @@ -88,6 +89,12 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, } return 0; + +err_free: + vfree(q->buf); + kfree(q); +err_out: + return err; } int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, @@ -145,9 +152,10 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, struct rxe_modify_srq_cmd *ucmd, struct ib_udata *udata) { - int err; struct rxe_queue *q = srq->rq.queue; struct mminfo __user *mi = NULL; + int wqe_size; + int err; if (mask & IB_SRQ_MAX_WR) { /* @@ -156,12 +164,16 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, */ mi = u64_to_user_ptr(ucmd->mmap_info_addr); - err = rxe_queue_resize(q, &attr->max_wr, - rcv_wqe_size(srq->rq.max_sge), udata, mi, - &srq->rq.producer_lock, + wqe_size = sizeof(struct rxe_recv_wqe) + + srq->rq.max_sge*sizeof(struct ib_sge); + + err = rxe_queue_resize(q, &attr->max_wr, wqe_size, + udata, mi, &srq->rq.producer_lock, &srq->rq.consumer_lock); if (err) - goto err2; + goto err_free; + + srq->rq.max_wr = attr->max_wr; } if (mask & IB_SRQ_LIMIT) @@ -169,7 +181,7 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, return 0; -err2: +err_free: rxe_queue_cleanup(q); srq->rq.queue = NULL; return err; -- cgit v1.2.3 From 5d122db2ff80cd2aed4dcd630befb56b51ddf947 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Fri, 21 Jul 2023 15:07:49 -0500 Subject: RDMA/rxe: Fix incomplete state save in rxe_requester If a send packet is dropped by the IP layer in rxe_requester() the call to rxe_xmit_packet() can fail with err == -EAGAIN. To recover, the state of the wqe is restored to the state before the packet was sent so it can be resent. However, the routines that save and restore the state miss a significnt part of the variable state in the wqe, the dma struct which is used to process through the sge table. And, the state is not saved before the packet is built which modifies the dma struct. Under heavy stress testing with many QPs on a fast node sending large messages to a slow node dropped packets are observed and the resent packets are corrupted because the dma struct was not restored. This patch fixes this behavior and allows the test cases to succeed. Fixes: 3050b9985024 ("IB/rxe: Fix race condition between requester and completer") Link: https://lore.kernel.org/r/20230721200748.4604-1-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_req.c | 45 ++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 20 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 2171f19494bc..d8c41fd626a9 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -578,10 +578,11 @@ static void save_state(struct rxe_send_wqe *wqe, struct rxe_send_wqe *rollback_wqe, u32 *rollback_psn) { - rollback_wqe->state = wqe->state; + rollback_wqe->state = wqe->state; rollback_wqe->first_psn = wqe->first_psn; - rollback_wqe->last_psn = wqe->last_psn; - *rollback_psn = qp->req.psn; + rollback_wqe->last_psn = wqe->last_psn; + rollback_wqe->dma = wqe->dma; + *rollback_psn = qp->req.psn; } static void rollback_state(struct rxe_send_wqe *wqe, @@ -589,10 +590,11 @@ static void rollback_state(struct rxe_send_wqe *wqe, struct rxe_send_wqe *rollback_wqe, u32 rollback_psn) { - wqe->state = rollback_wqe->state; + wqe->state = rollback_wqe->state; wqe->first_psn = rollback_wqe->first_psn; - wqe->last_psn = rollback_wqe->last_psn; - qp->req.psn = rollback_psn; + wqe->last_psn = rollback_wqe->last_psn; + wqe->dma = rollback_wqe->dma; + qp->req.psn = rollback_psn; } static void update_state(struct rxe_qp *qp, struct rxe_pkt_info *pkt) @@ -797,6 +799,9 @@ int rxe_requester(struct rxe_qp *qp) pkt.mask = rxe_opcode[opcode].mask; pkt.wqe = wqe; + /* save wqe state before we build and send packet */ + save_state(wqe, qp, &rollback_wqe, &rollback_psn); + av = rxe_get_av(&pkt, &ah); if (unlikely(!av)) { rxe_dbg_qp(qp, "Failed no address vector\n"); @@ -829,29 +834,29 @@ int rxe_requester(struct rxe_qp *qp) if (ah) rxe_put(ah); - /* - * To prevent a race on wqe access between requester and completer, - * wqe members state and psn need to be set before calling - * rxe_xmit_packet(). - * Otherwise, completer might initiate an unjustified retry flow. - */ - save_state(wqe, qp, &rollback_wqe, &rollback_psn); + /* update wqe state as though we had sent it */ update_wqe_state(qp, wqe, &pkt); update_wqe_psn(qp, wqe, &pkt, payload); err = rxe_xmit_packet(qp, &pkt, skb); if (err) { - qp->need_req_skb = 1; + if (err != -EAGAIN) { + wqe->status = IB_WC_LOC_QP_OP_ERR; + goto err; + } + /* the packet was dropped so reset wqe to the state + * before we sent it so we can try to resend + */ rollback_state(wqe, qp, &rollback_wqe, rollback_psn); - if (err == -EAGAIN) { - rxe_sched_task(&qp->req.task); - goto exit; - } + /* force a delay until the dropped packet is freed and + * the send queue is drained below the low water mark + */ + qp->need_req_skb = 1; - wqe->status = IB_WC_LOC_QP_OP_ERR; - goto err; + rxe_sched_task(&qp->req.task); + goto exit; } update_state(qp, &pkt); -- cgit v1.2.3 From b056327bee09e6b86683d3f709a438ccd6031d72 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 21 Aug 2023 21:32:53 +0800 Subject: RDMA/siw: Balance the reference of cep->kref in the error path The siw_connect can go to err in below after cep is allocated successfully: 1. If siw_cm_alloc_work returns failure. In this case socket is not assoicated with cep so siw_cep_put can't be called by siw_socket_disassoc. We need to call siw_cep_put twice since cep->kref is increased once after it was initialized. 2. If siw_cm_queue_work can't find a work, which means siw_cep_get is not called in siw_cm_queue_work, so cep->kref is increased twice by siw_cep_get and when associate socket with cep after it was initialized. So we need to call siw_cep_put three times (one in siw_socket_disassoc). 3. siw_send_mpareqrep returns error, this scenario is similar as 2. So we need to remove one siw_cep_put in the error path. Fixes: 6c52fdc244b5 ("rdma/siw: connection management") Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20230821133255.31111-2-guoqing.jiang@linux.dev Acked-by: Bernard Metzler Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_cm.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index da530c0404da..a2605178f4ed 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1501,7 +1501,6 @@ error: cep->cm_id = NULL; id->rem_ref(id); - siw_cep_put(cep); qp->cep = NULL; siw_cep_put(cep); -- cgit v1.2.3 From bee024d20451e4ce04ea30099cad09f7f75d288b Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 21 Aug 2023 21:32:54 +0800 Subject: RDMA/siw: Correct wrong debug message We need to print num_sle first then pbl->max_buf per the condition. Also replace mem->pbl with pbl while at it. Fixes: 303ae1cdfdf7 ("rdma/siw: application interface") Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20230821133255.31111-3-guoqing.jiang@linux.dev Acked-by: Bernard Metzler Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index fadfa70853f3..fdbef3254e30 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -1494,7 +1494,7 @@ int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle, if (pbl->max_buf < num_sle) { siw_dbg_mem(mem, "too many SGE's: %d > %d\n", - mem->pbl->max_buf, num_sle); + num_sle, pbl->max_buf); return -ENOMEM; } for_each_sg(sl, slp, num_sle, i) { -- cgit v1.2.3 From 9dfccb6d0d3d13347c61ff0136b22d5d772d2075 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Mon, 21 Aug 2023 21:32:55 +0800 Subject: RDMA/siw: Call llist_reverse_order in siw_run_sq We can call the function to get fifo list. Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20230821133255.31111-4-guoqing.jiang@linux.dev Acked-by: Bernard Metzler Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/siw/siw_qp_tx.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 3ff339eceec3..60b6a4135961 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -1271,13 +1271,7 @@ int siw_run_sq(void *data) * llist_del_all returns a list with newest entry first. * Re-order list for fairness among QP's. */ - while (active) { - struct llist_node *tmp = active; - - active = llist_next(active); - tmp->next = fifo_list; - fifo_list = tmp; - } + fifo_list = llist_reverse_order(active); while (fifo_list) { qp = container_of(fifo_list, struct siw_qp, tx_list); fifo_list = llist_next(fifo_list); -- cgit v1.2.3 From 6812e06999054792e13193666989dbdc01642625 Mon Sep 17 00:00:00 2001 From: Rohit Chavan Date: Tue, 22 Aug 2023 14:43:04 +0530 Subject: RDMA/rxe: Fix redundant break statement in switch-case. Removed unreachable break statement after return. Signed-off-by: Rohit Chavan Link: https://lore.kernel.org/r/20230822091304.7312-1-roheetchavan@gmail.com Acked-by: Zhu Yanjun Signed-off-by: Leon Romanovsky --- drivers/infiniband/sw/rxe/rxe_verbs.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 903f0b71447e..48f86839d36a 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -798,7 +798,6 @@ static int init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, rxe_err_qp(qp, "unsupported wr opcode %d", wr->opcode); return -EINVAL; - break; } } -- cgit v1.2.3