From 99f80d2f5fb6d4165186390ecba83952803b667b Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 10 Oct 2016 06:14:39 -0700 Subject: IB/hfi1: Optimize lkey validation structures Profiling shows that the key validation is susceptible to cache line trading when accessing the lkey table. Fix by separating out the read mostly fields from the write fields. In addition the shift amount, which is function of the lkey table size, is precomputed and stored with the table pointer. Since both the shift and table pointer are in the same read mostly cacheline, this saves a cache line in this hot path. Reviewed-by: Sebastian Sanchez Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 46b64970058e..4acf179adf3b 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -84,6 +84,7 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi) lkey_table_size = rdi->dparms.lkey_table_size; } rdi->lkey_table.max = 1 << lkey_table_size; + rdi->lkey_table.shift = 32 - lkey_table_size; lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table); rdi->lkey_table.table = (struct rvt_mregion __rcu **) vmalloc_node(lk_tab_size, rdi->dparms.node); @@ -774,7 +775,6 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct rvt_mregion *mr; unsigned n, m; size_t off; - struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); /* * We use LKEY == zero for kernel virtual addresses @@ -782,6 +782,8 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, */ rcu_read_lock(); if (sge->lkey == 0) { + struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); + if (pd->user) goto bail; mr = rcu_dereference(dev->dma_mr); @@ -798,8 +800,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, isge->n = 0; goto ok; } - mr = rcu_dereference( - rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]); + mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]); if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) goto bail; @@ -899,8 +900,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, goto ok; } - mr = rcu_dereference( - rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]); + mr = rcu_dereference(rkt->table[rkey >> rkt->shift]); if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) goto bail; -- cgit v1.2.3 From 907610bfdf1a7f3d173a9593fde70f67d63476d1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 3 Nov 2016 16:44:23 +0200 Subject: IB/rxe: Remove and fix debug prints after allocation failure The prints after [k|v][m|z|c]alloc() functions are not needed, because in case of failure, allocator will print their internal error prints anyway. Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_pool.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 6bac0717c540..d723947a8542 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -180,7 +180,6 @@ static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) size = BITS_TO_LONGS(max - min + 1) * sizeof(long); pool->table = kmalloc(size, GFP_KERNEL); if (!pool->table) { - pr_warn("no memory for bit table\n"); err = -ENOMEM; goto out; } -- cgit v1.2.3 From cf4c2f8c9d0483f7585883d3f6733ebf09ea837c Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 7 Dec 2016 19:32:47 -0800 Subject: IB/rdmavt: Fix trace hierarchy Split rdmavt traces into separate files to preserve the original hierarchy since only one trace sub system may now be defined per header file. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/trace.h | 140 +------------------------------ drivers/infiniband/sw/rdmavt/trace_qp.h | 96 +++++++++++++++++++++ drivers/infiniband/sw/rdmavt/trace_rvt.h | 81 ++++++++++++++++++ drivers/infiniband/sw/rdmavt/trace_tx.h | 132 +++++++++++++++++++++++++++++ 4 files changed, 312 insertions(+), 137 deletions(-) create mode 100644 drivers/infiniband/sw/rdmavt/trace_qp.h create mode 100644 drivers/infiniband/sw/rdmavt/trace_rvt.h create mode 100644 drivers/infiniband/sw/rdmavt/trace_tx.h (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h index 6c0457db5499..11a2afbc8e76 100644 --- a/drivers/infiniband/sw/rdmavt/trace.h +++ b/drivers/infiniband/sw/rdmavt/trace.h @@ -45,143 +45,9 @@ * */ -#undef TRACE_SYSTEM_VAR -#define TRACE_SYSTEM_VAR rdmavt - -#if !defined(__RDMAVT_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) -#define __RDMAVT_TRACE_H - -#include -#include - -#include -#include - #define RDI_DEV_ENTRY(rdi) __string(dev, rdi->driver_f.get_card_name(rdi)) #define RDI_DEV_ASSIGN(rdi) __assign_str(dev, rdi->driver_f.get_card_name(rdi)) -#undef TRACE_SYSTEM -#define TRACE_SYSTEM rdmavt - -TRACE_EVENT(rvt_dbg, - TP_PROTO(struct rvt_dev_info *rdi, - const char *msg), - TP_ARGS(rdi, msg), - TP_STRUCT__entry( - RDI_DEV_ENTRY(rdi) - __string(msg, msg) - ), - TP_fast_assign( - RDI_DEV_ASSIGN(rdi); - __assign_str(msg, msg); - ), - TP_printk("[%s]: %s", __get_str(dev), __get_str(msg)) -); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM rvt_qphash -DECLARE_EVENT_CLASS(rvt_qphash_template, - TP_PROTO(struct rvt_qp *qp, u32 bucket), - TP_ARGS(qp, bucket), - TP_STRUCT__entry( - RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) - __field(u32, qpn) - __field(u32, bucket) - ), - TP_fast_assign( - RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) - __entry->qpn = qp->ibqp.qp_num; - __entry->bucket = bucket; - ), - TP_printk( - "[%s] qpn 0x%x bucket %u", - __get_str(dev), - __entry->qpn, - __entry->bucket - ) -); - -DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert, - TP_PROTO(struct rvt_qp *qp, u32 bucket), - TP_ARGS(qp, bucket)); - -DEFINE_EVENT(rvt_qphash_template, rvt_qpremove, - TP_PROTO(struct rvt_qp *qp, u32 bucket), - TP_ARGS(qp, bucket)); - -#undef TRACE_SYSTEM -#define TRACE_SYSTEM rvt_tx - -#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode } -#define show_wr_opcode(opcode) \ -__print_symbolic(opcode, \ - wr_opcode_name(RDMA_WRITE), \ - wr_opcode_name(RDMA_WRITE_WITH_IMM), \ - wr_opcode_name(SEND), \ - wr_opcode_name(SEND_WITH_IMM), \ - wr_opcode_name(RDMA_READ), \ - wr_opcode_name(ATOMIC_CMP_AND_SWP), \ - wr_opcode_name(ATOMIC_FETCH_AND_ADD), \ - wr_opcode_name(LSO), \ - wr_opcode_name(SEND_WITH_INV), \ - wr_opcode_name(RDMA_READ_WITH_INV), \ - wr_opcode_name(LOCAL_INV), \ - wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \ - wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD)) - -#define POS_PRN \ -"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u" - -TRACE_EVENT( - rvt_post_one_wr, - TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe), - TP_ARGS(qp, wqe), - TP_STRUCT__entry( - RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) - __field(u64, wr_id) - __field(u32, qpn) - __field(u32, psn) - __field(u32, lpsn) - __field(u32, length) - __field(u32, opcode) - __field(u32, size) - __field(u32, avail) - __field(u32, head) - __field(u32, last) - ), - TP_fast_assign( - RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) - __entry->wr_id = wqe->wr.wr_id; - __entry->qpn = qp->ibqp.qp_num; - __entry->psn = wqe->psn; - __entry->lpsn = wqe->lpsn; - __entry->length = wqe->length; - __entry->opcode = wqe->wr.opcode; - __entry->size = qp->s_size; - __entry->avail = qp->s_avail; - __entry->head = qp->s_head; - __entry->last = qp->s_last; - ), - TP_printk( - POS_PRN, - __get_str(dev), - __entry->wr_id, - __entry->qpn, - __entry->psn, - __entry->lpsn, - __entry->length, - __entry->opcode, show_wr_opcode(__entry->opcode), - __entry->size, - __entry->avail, - __entry->head, - __entry->last - ) -); - -#endif /* __RDMAVT_TRACE_H */ - -#undef TRACE_INCLUDE_PATH -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace -#include +#include "trace_rvt.h" +#include "trace_qp.h" +#include "trace_tx.h" diff --git a/drivers/infiniband/sw/rdmavt/trace_qp.h b/drivers/infiniband/sw/rdmavt/trace_qp.h new file mode 100644 index 000000000000..4c77a3119bda --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace_qp.h @@ -0,0 +1,96 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__RVT_TRACE_QP_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RVT_TRACE_QP_H + +#include +#include + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt_qp + +DECLARE_EVENT_CLASS(rvt_qphash_template, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) + __field(u32, qpn) + __field(u32, bucket) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) + __entry->qpn = qp->ibqp.qp_num; + __entry->bucket = bucket; + ), + TP_printk( + "[%s] qpn 0x%x bucket %u", + __get_str(dev), + __entry->qpn, + __entry->bucket + ) +); + +DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket)); + +DEFINE_EVENT(rvt_qphash_template, rvt_qpremove, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket)); + + +#endif /* __RVT_TRACE_QP_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_qp +#include + diff --git a/drivers/infiniband/sw/rdmavt/trace_rvt.h b/drivers/infiniband/sw/rdmavt/trace_rvt.h new file mode 100644 index 000000000000..746f33461d9a --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace_rvt.h @@ -0,0 +1,81 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__RVT_TRACE_RVT_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RVT_TRACE_RVT_H + +#include +#include + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt + +TRACE_EVENT(rvt_dbg, + TP_PROTO(struct rvt_dev_info *rdi, + const char *msg), + TP_ARGS(rdi, msg), + TP_STRUCT__entry( + RDI_DEV_ENTRY(rdi) + __string(msg, msg) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(rdi); + __assign_str(msg, msg); + ), + TP_printk("[%s]: %s", __get_str(dev), __get_str(msg)) +); + +#endif /* __RVT_TRACE_MISC_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_rvt +#include + diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h new file mode 100644 index 000000000000..0e03173662d8 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace_tx.h @@ -0,0 +1,132 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__RVT_TRACE_TX_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RVT_TRACE_TX_H + +#include +#include + +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt_tx + +#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode } +#define show_wr_opcode(opcode) \ +__print_symbolic(opcode, \ + wr_opcode_name(RDMA_WRITE), \ + wr_opcode_name(RDMA_WRITE_WITH_IMM), \ + wr_opcode_name(SEND), \ + wr_opcode_name(SEND_WITH_IMM), \ + wr_opcode_name(RDMA_READ), \ + wr_opcode_name(ATOMIC_CMP_AND_SWP), \ + wr_opcode_name(ATOMIC_FETCH_AND_ADD), \ + wr_opcode_name(LSO), \ + wr_opcode_name(SEND_WITH_INV), \ + wr_opcode_name(RDMA_READ_WITH_INV), \ + wr_opcode_name(LOCAL_INV), \ + wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \ + wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD)) + +#define POS_PRN \ +"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u" + +TRACE_EVENT( + rvt_post_one_wr, + TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe), + TP_ARGS(qp, wqe), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) + __field(u64, wr_id) + __field(u32, qpn) + __field(u32, psn) + __field(u32, lpsn) + __field(u32, length) + __field(u32, opcode) + __field(u32, size) + __field(u32, avail) + __field(u32, head) + __field(u32, last) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) + __entry->wr_id = wqe->wr.wr_id; + __entry->qpn = qp->ibqp.qp_num; + __entry->psn = wqe->psn; + __entry->lpsn = wqe->lpsn; + __entry->length = wqe->length; + __entry->opcode = wqe->wr.opcode; + __entry->size = qp->s_size; + __entry->avail = qp->s_avail; + __entry->head = qp->s_head; + __entry->last = qp->s_last; + ), + TP_printk( + POS_PRN, + __get_str(dev), + __entry->wr_id, + __entry->qpn, + __entry->psn, + __entry->lpsn, + __entry->length, + __entry->opcode, show_wr_opcode(__entry->opcode), + __entry->size, + __entry->avail, + __entry->head, + __entry->last + ) +); + +#endif /* __RVT_TRACE_TX_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_tx +#include + -- cgit v1.2.3 From fcb29a6668a254104d38c0132d1853d0644af066 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 7 Dec 2016 19:33:20 -0800 Subject: IB/rdmavt: Add trace of MR segs Add tracing of MR segment information. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mr.c | 4 ++ drivers/infiniband/sw/rdmavt/trace.h | 1 + drivers/infiniband/sw/rdmavt/trace_mr.h | 112 ++++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+) create mode 100644 drivers/infiniband/sw/rdmavt/trace_mr.h (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 4acf179adf3b..562cf78b6ae9 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -51,6 +51,7 @@ #include #include "vt.h" #include "mr.h" +#include "trace.h" /** * rvt_driver_mr_init - Init MR resources per driver @@ -403,6 +404,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } mr->mr.map[m]->segs[n].vaddr = vaddr; mr->mr.map[m]->segs[n].length = umem->page_size; + trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, umem->page_size); n++; if (n == RVT_SEGSZ) { m++; @@ -507,6 +509,7 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr) n = mapped_segs % RVT_SEGSZ; mr->mr.map[m]->segs[n].vaddr = (void *)addr; mr->mr.map[m]->segs[n].length = ps; + trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps); mr->mr.length += ps; return 0; @@ -693,6 +696,7 @@ int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, for (i = 0; i < list_len; i++) { fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i]; fmr->mr.map[m]->segs[n].length = ps; + trace_rvt_mr_fmr_seg(&fmr->mr, m, n, (void *)page_list[i], ps); if (++n == RVT_SEGSZ) { m++; n = 0; diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h index 11a2afbc8e76..e2d23acb6a7d 100644 --- a/drivers/infiniband/sw/rdmavt/trace.h +++ b/drivers/infiniband/sw/rdmavt/trace.h @@ -51,3 +51,4 @@ #include "trace_rvt.h" #include "trace_qp.h" #include "trace_tx.h" +#include "trace_mr.h" diff --git a/drivers/infiniband/sw/rdmavt/trace_mr.h b/drivers/infiniband/sw/rdmavt/trace_mr.h new file mode 100644 index 000000000000..3318a6c36373 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/trace_mr.h @@ -0,0 +1,112 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#if !defined(__RVT_TRACE_MR_H) || defined(TRACE_HEADER_MULTI_READ) +#define __RVT_TRACE_MR_H + +#include +#include + +#include +#include +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt_mr +DECLARE_EVENT_CLASS( + rvt_mr_template, + TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), + TP_ARGS(mr, m, n, v, len), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(mr->pd->device)) + __field(void *, vaddr) + __field(struct page *, page) + __field(size_t, len) + __field(u32, lkey) + __field(u16, m) + __field(u16, n) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(mr->pd->device)); + __entry->vaddr = v; + __entry->page = virt_to_page(v); + __entry->m = m; + __entry->n = n; + __entry->len = len; + ), + TP_printk( + "[%s] vaddr %p page %p m %u n %u len %ld", + __get_str(dev), + __entry->vaddr, + __entry->page, + __entry->m, + __entry->n, + __entry->len + ) +); + +DEFINE_EVENT( + rvt_mr_template, rvt_mr_page_seg, + TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), + TP_ARGS(mr, m, n, v, len)); + +DEFINE_EVENT( + rvt_mr_template, rvt_mr_fmr_seg, + TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), + TP_ARGS(mr, m, n, v, len)); + +DEFINE_EVENT( + rvt_mr_template, rvt_mr_user_seg, + TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len), + TP_ARGS(mr, m, n, v, len)); + +#endif /* __RVT_TRACE_MR_H */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_mr +#include -- cgit v1.2.3 From b44980f8794faf3561d970fe8c05e34765fbdc9b Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Wed, 7 Dec 2016 19:33:46 -0800 Subject: IB/hfi1: Replace qp->refcount release code with standard driver wrapper Some parts of the code don't use the standard release wrapper rvt_put_qp() for decrementing and testing the refcount to then try to use a resource. Replace this code with the standard driver wrapper. Fixes: Commit 4d6f85c3fa55 ("IB/rdmavt, IB/qib, IB/hfi1: Use new QP put get routines") Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/driver.c | 3 +-- drivers/infiniband/sw/rdmavt/mcast.c | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 6563e4d38b80..d4261163bd25 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -793,8 +793,7 @@ static inline void process_rcv_qp_work(struct hfi1_packet *packet) hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); } - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } } diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index 983d319ac976..05c8c2afb0e3 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -81,7 +81,7 @@ static struct rvt_mcast_qp *rvt_mcast_qp_alloc(struct rvt_qp *qp) goto bail; mqp->qp = qp; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); bail: return mqp; @@ -92,8 +92,7 @@ static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp) struct rvt_qp *qp = mqp->qp; /* Notify hfi1_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); kfree(mqp); } -- cgit v1.2.3 From f84dfa26e61c6f31a3a0366532296911df763170 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Wed, 7 Dec 2016 19:33:53 -0800 Subject: IB/hfi1: Use reference count wrapper for MRs Some parts of the code don't use the standard driver wrapper for memory region reference counters. Use the standard driver wrapper throughout the code. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 562cf78b6ae9..52fd15276ee6 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -793,7 +793,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, mr = rcu_dereference(dev->dma_mr); if (!mr) goto bail; - atomic_inc(&mr->refcount); + rvt_get_mr(mr); rcu_read_unlock(); isge->mr = mr; @@ -814,7 +814,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, off + sge->length > mr->length || (mr->access_flags & acc) != acc)) goto bail; - atomic_inc(&mr->refcount); + rvt_get_mr(mr); rcu_read_unlock(); off += mr->offset; @@ -892,7 +892,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, mr = rcu_dereference(rdi->dma_mr); if (!mr) goto bail; - atomic_inc(&mr->refcount); + rvt_get_mr(mr); rcu_read_unlock(); sge->mr = mr; @@ -913,7 +913,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, if (unlikely(vaddr < mr->iova || off + len > mr->length || (mr->access_flags & acc) == 0)) goto bail; - atomic_inc(&mr->refcount); + rvt_get_mr(mr); rcu_read_unlock(); off += mr->offset; -- cgit v1.2.3 From f2dc9cdce83c4aac2f8b6c803b0327df1c7d44a6 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 7 Dec 2016 19:34:06 -0800 Subject: IB/rdmavt: Add a send completion helper This is for use by client drivers to drive send completions into a CQ. A new exported table allows for the mapping of ib_wr_opcode into a ib_wc_opcode. Reviewed-by: Ashutosh Dixit Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 17 +++++++++++++++++ include/rdma/rdmavt_qp.h | 40 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 6500c3b5a89c..9e14addd690c 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -76,6 +76,23 @@ const int ib_rvt_state_ops[IB_QPS_ERR + 1] = { }; EXPORT_SYMBOL(ib_rvt_state_ops); +/* + * Translate ib_wr_opcode into ib_wc_opcode. + */ +const enum ib_wc_opcode ib_rvt_wc_opcode[] = { + [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, + [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, + [IB_WR_SEND] = IB_WC_SEND, + [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, + [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, + [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, + [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD, + [IB_WR_SEND_WITH_INV] = IB_WC_SEND, + [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV, + [IB_WR_REG_MR] = IB_WC_REG_MR +}; +EXPORT_SYMBOL(ib_rvt_wc_opcode); + static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, gfp_t gfp) diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 2c5183ef0243..d78e99cf6c11 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -51,6 +51,7 @@ #include #include #include +#include /* * Atomic bit definitions for r_aflags. */ @@ -527,7 +528,44 @@ static inline void rvt_qp_wqe_unreserve( } } -extern const int ib_rvt_state_ops[]; +extern const enum ib_wc_opcode ib_rvt_wc_opcode[]; + +/** + * rvt_qp_swqe_complete() - insert send completion + * @qp - the qp + * @wqe - the send wqe + * @status - completion status + * + * Insert a send completion into the completion + * queue if the qp indicates it should be done. + * + * See IBTA 10.7.3.1 for info on completion + * control. + */ +static inline void rvt_qp_swqe_complete( + struct rvt_qp *qp, + struct rvt_swqe *wqe, + enum ib_wc_status status) +{ + if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED)) + return; + if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) || + (wqe->wr.send_flags & IB_SEND_SIGNALED) || + status != IB_WC_SUCCESS) { + struct ib_wc wc; + + memset(&wc, 0, sizeof(wc)); + wc.wr_id = wqe->wr.wr_id; + wc.status = status; + wc.opcode = ib_rvt_wc_opcode[wqe->wr.opcode]; + wc.qp = &qp->ibqp; + wc.byte_len = wqe->length; + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, + status != IB_WC_SUCCESS); + } +} + +extern const int ib_rvt_state_ops[]; struct rvt_dev_info; int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err); -- cgit v1.2.3 From a0fa72683e78979ef1123d679b1c40ae28bd9096 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 19 Sep 2016 13:57:26 +0200 Subject: IB/rxe: avoid putting a large struct rxe_qp on stack A race condition fix added an rxe_qp structure to the stack in order to be able to perform rollback in rxe_requester(), but the structure is large enough to trigger the warning for possible stack overflow: drivers/infiniband/sw/rxe/rxe_req.c: In function 'rxe_requester': drivers/infiniband/sw/rxe/rxe_req.c:757:1: error: the frame size of 2064 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] This changes the rollback function to only save the psn inside the qp, which is the only field we access in the rollback_qp anyway. Fixes: 3050b9985024 ("IB/rxe: Fix race condition between requester and completer") Signed-off-by: Arnd Bergmann Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_req.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 832846b73ea0..205222909e53 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -548,23 +548,23 @@ static void update_wqe_psn(struct rxe_qp *qp, static void save_state(struct rxe_send_wqe *wqe, struct rxe_qp *qp, struct rxe_send_wqe *rollback_wqe, - struct rxe_qp *rollback_qp) + u32 *rollback_psn) { rollback_wqe->state = wqe->state; rollback_wqe->first_psn = wqe->first_psn; rollback_wqe->last_psn = wqe->last_psn; - rollback_qp->req.psn = qp->req.psn; + *rollback_psn = qp->req.psn; } static void rollback_state(struct rxe_send_wqe *wqe, struct rxe_qp *qp, struct rxe_send_wqe *rollback_wqe, - struct rxe_qp *rollback_qp) + u32 rollback_psn) { wqe->state = rollback_wqe->state; wqe->first_psn = rollback_wqe->first_psn; wqe->last_psn = rollback_wqe->last_psn; - qp->req.psn = rollback_qp->req.psn; + qp->req.psn = rollback_psn; } static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, @@ -593,8 +593,8 @@ int rxe_requester(void *arg) int mtu; int opcode; int ret; - struct rxe_qp rollback_qp; struct rxe_send_wqe rollback_wqe; + u32 rollback_psn; next_wqe: if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) @@ -718,7 +718,7 @@ next_wqe: * rxe_xmit_packet(). * Otherwise, completer might initiate an unjustified retry flow. */ - save_state(wqe, qp, &rollback_wqe, &rollback_qp); + save_state(wqe, qp, &rollback_wqe, &rollback_psn); update_wqe_state(qp, wqe, &pkt); update_wqe_psn(qp, wqe, &pkt, payload); ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); @@ -726,7 +726,7 @@ next_wqe: qp->need_req_skb = 1; kfree_skb(skb); - rollback_state(wqe, qp, &rollback_wqe, &rollback_qp); + rollback_state(wqe, qp, &rollback_wqe, rollback_psn); if (ret == -EAGAIN) { rxe_run_task(&qp->req.task, 1); -- cgit v1.2.3 From 4ac4707102d9ea1ffc9b5735891f6c5ee3d236e5 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 29 Oct 2016 16:19:33 +0000 Subject: IB/rxe: Use DEFINE_SPINLOCK() for spinlock spinlock can be initialized automatically with DEFINE_SPINLOCK() rather than explicitly calling spin_lock_init(). Signed-off-by: Wei Yongjun Reviewed-by: Leon Romanosky Reviewed-by: Moni Shoua Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_net.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index b8258e4f0aea..4cb63780fa74 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -46,7 +46,7 @@ #include "rxe_loc.h" static LIST_HEAD(rxe_dev_list); -static spinlock_t dev_list_lock; /* spinlock for device list */ +static DEFINE_SPINLOCK(dev_list_lock); /* spinlock for device list */ struct rxe_dev *net_to_rxe(struct net_device *ndev) { @@ -663,8 +663,6 @@ struct notifier_block rxe_net_notifier = { int rxe_net_ipv4_init(void) { - spin_lock_init(&dev_list_lock); - recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, htons(ROCE_V2_UDP_DPORT), false); if (IS_ERR(recv_sockets.sk4)) { @@ -680,8 +678,6 @@ int rxe_net_ipv6_init(void) { #if IS_ENABLED(CONFIG_IPV6) - spin_lock_init(&dev_list_lock); - recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, htons(ROCE_V2_UDP_DPORT), true); if (IS_ERR(recv_sockets.sk6)) { -- cgit v1.2.3 From 95db9d05b717b1ec242799103c09acda8d728974 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 17 Nov 2016 14:00:05 +0300 Subject: IB/rxe: Remove unneeded cast in rxe_srq_from_attr() It makes me nervous when we cast pointer parameters. I would estimate that around 50% of the time, it indicates a bug. Here the cast is not needed becaue u32 and and unsigned int are the same thing. Removing the cast makes the code more robust and future proof in case any of the types change. Signed-off-by: Dan Carpenter Reviewed-by: Leon Romanovsky Reviewed-by: Yuval Shaia Acked-by: Moni Shoua Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_srq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c index 2a6e3cd2d4e8..efc832a2d7c6 100644 --- a/drivers/infiniband/sw/rxe/rxe_srq.c +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -169,7 +169,7 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, } } - err = rxe_queue_resize(q, (unsigned int *)&attr->max_wr, + err = rxe_queue_resize(q, &attr->max_wr, rcv_wqe_size(srq->rq.max_sge), srq->rq.queue->ip ? srq->rq.queue->ip->context : -- cgit v1.2.3 From 6e9bb530ff8b07b3bbce79c937fc9dcb32da4eb9 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Wed, 23 Nov 2016 12:39:17 -0500 Subject: IB/rxe: Remove buffer used for printing IP address Avoid smashing the stack when an ICRC error occurs on an IPv6 network. Signed-off-by: Andrew Boyer Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_recv.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 46f062842a9a..252b4d637d45 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -391,16 +391,15 @@ int rxe_rcv(struct sk_buff *skb) payload_size(pkt)); calc_icrc = cpu_to_be32(~calc_icrc); if (unlikely(calc_icrc != pack_icrc)) { - char saddr[sizeof(struct in6_addr)]; - if (skb->protocol == htons(ETH_P_IPV6)) - sprintf(saddr, "%pI6", &ipv6_hdr(skb)->saddr); + pr_warn_ratelimited("bad ICRC from %pI6c\n", + &ipv6_hdr(skb)->saddr); else if (skb->protocol == htons(ETH_P_IP)) - sprintf(saddr, "%pI4", &ip_hdr(skb)->saddr); + pr_warn_ratelimited("bad ICRC from %pI4\n", + &ip_hdr(skb)->saddr); else - sprintf(saddr, "unknown"); + pr_warn_ratelimited("bad ICRC from unknown\n"); - pr_warn_ratelimited("bad ICRC from %s\n", saddr); goto drop; } -- cgit v1.2.3 From dd753d87436ce7ef2a958d684b38d5acc99c9f5c Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Wed, 23 Nov 2016 12:39:18 -0500 Subject: IB/rxe: Advance the consumer pointer before posting the CQE A simple userspace application might poll the CQ, find a completion, and then attempt to post a new WQE to the SQ. A spurious error can occur if the userspace application detects a full SQ in the instant before the kernel is able to advance the SQ consumer pointer. This is noticeable when using single-entry SQs with ibv_rc_pingpong if lots of kernel and userspace library debugging is enabled. Signed-off-by: Andrew Boyer Reviewed-by: Yonatan Cohen Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_comp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 6c5e29db88e3..d46c49b33b13 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -420,11 +420,12 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) (wqe->wr.send_flags & IB_SEND_SIGNALED) || (qp->req.state == QP_STATE_ERROR)) { make_send_cqe(qp, wqe, &cqe); + advance_consumer(qp->sq.queue); rxe_cq_post(qp->scq, &cqe, 0); + } else { + advance_consumer(qp->sq.queue); } - advance_consumer(qp->sq.queue); - /* * we completed something so let req run again * if it is trying to fence -- cgit v1.2.3 From 2a7a85487e5432424eef7a394ed26ef1d8f0d192 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Wed, 23 Nov 2016 12:39:19 -0500 Subject: IB/rxe: Don't update the response PSN unless it's going forwards A client might post a read followed by a send. The partner receives and acknowledges both transactions, posting an RCQ entry for the send, but something goes wrong with the read ACK. When the client retries the read, the partner's responder processes the duplicate read but incorrectly resets the PSN to the value preceding the original send. When the duplicate send arrives, the responder cannot tell that it is a duplicate, so the responder generates a duplicate RCQ entry, confusing the client. Signed-off-by: Andrew Boyer Reviewed-by: Yonatan Cohen Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_resp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index dd3d88adc003..cb3fd4cb0daa 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -742,7 +742,8 @@ static enum resp_states read_reply(struct rxe_qp *qp, } else { qp->resp.res = NULL; qp->resp.opcode = -1; - qp->resp.psn = res->cur_psn; + if (psn_compare(res->cur_psn, qp->resp.psn) >= 0) + qp->resp.psn = res->cur_psn; state = RESPST_CLEANUP; } -- cgit v1.2.3 From d38eb801aa145aedf4b97e8e0bb2e65763aa6149 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Wed, 23 Nov 2016 12:39:20 -0500 Subject: IB/rxe: Unblock loopback by moving skb_out increment skb_out is decremented in rxe_skb_tx_dtor(), which is not called in the loopback() path. Move the increment to the send() path rather than rxe_xmit_packet(). Signed-off-by: Andrew Boyer Acked-by: Moni Shoua Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_loc.h | 2 -- drivers/infiniband/sw/rxe/rxe_net.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 73849a5a91b3..efe4c6a35442 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -266,8 +266,6 @@ static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp, return err; } - atomic_inc(&qp->skb_out); - if ((qp_type(qp) != IB_QPT_RC) && (pkt->mask & RXE_END_MASK)) { pkt->wqe->state = wqe_state_done; diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 4cb63780fa74..a576603304f7 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -459,6 +459,8 @@ static int send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, return -EAGAIN; } + if (pkt->qp) + atomic_inc(&pkt->qp->skb_out); kfree_skb(skb); return 0; -- cgit v1.2.3 From d4fb59256ac03d84f68e36c430b58d6fc76dd651 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Wed, 23 Nov 2016 12:39:21 -0500 Subject: IB/rxe: Add support for zero-byte operations The last_psn algorithm fails in the zero-byte case: it calculates first_psn = N, last_psn = N-1. This makes the operation unretryable since the res structure will fail the (first_psn <= psn <= last_psn) test in find_resource(). While here, use BTH_PSN_MASK to mask the calculated last_psn. Signed-off-by: Andrew Boyer Reviewed-by: Moni Shoua Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_mr.c | 3 +++ drivers/infiniband/sw/rxe/rxe_resp.c | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 1869152f1d23..d0faca294006 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -355,6 +355,9 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, size_t offset; u32 crc = crcp ? (*crcp) : 0; + if (length == 0) + return 0; + if (mem->type == RXE_MEM_TYPE_DMA) { u8 *src, *dest; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index cb3fd4cb0daa..a5e9ce34171b 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -444,6 +444,13 @@ static enum resp_states check_rkey(struct rxe_qp *qp, return RESPST_EXECUTE; } + /* A zero-byte op is not required to set an addr or rkey. */ + if ((pkt->mask & (RXE_READ_MASK | RXE_WRITE_OR_SEND)) && + (pkt->mask & RXE_RETH_MASK) && + reth_len(pkt) == 0) { + return RESPST_EXECUTE; + } + va = qp->resp.va; rkey = qp->resp.rkey; resid = qp->resp.resid; @@ -680,9 +687,14 @@ static enum resp_states read_reply(struct rxe_qp *qp, res->read.va_org = qp->resp.va; res->first_psn = req_pkt->psn; - res->last_psn = req_pkt->psn + - (reth_len(req_pkt) + mtu - 1) / - mtu - 1; + + if (reth_len(req_pkt)) { + res->last_psn = (req_pkt->psn + + (reth_len(req_pkt) + mtu - 1) / + mtu - 1) & BTH_PSN_MASK; + } else { + res->last_psn = res->first_psn; + } res->cur_psn = req_pkt->psn; res->read.resid = qp->resp.resid; -- cgit v1.2.3 From accacb8f51c299965939ac56926d1c718e2691a1 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Wed, 23 Nov 2016 12:39:22 -0500 Subject: IB/rxe: Add support for IB_CQ_REPORT_MISSED_EVENTS Peek at the CQ after arming it so that we can return a hint. This avoids missed completions due to a race between posting CQEs and arming the CQ. For example, CM teardown waits on MAD requests to complete with ib_cq_poll_work(). Without this fix, the last completion might be left on the CQ, hanging the kthread doing the teardown. The console backtraces look like this: [ 4199.911284] Call Trace: [ 4199.911401] [] schedule+0x35/0x80 [ 4199.911556] [] schedule_timeout+0x22f/0x2c0 [ 4199.911727] [] ? __schedule+0x368/0xa20 [ 4199.911891] [] wait_for_completion+0xb3/0x130 [ 4199.912067] [] ? wake_up_q+0x70/0x70 [ 4199.912243] [] cm_destroy_id+0x13d/0x450 [ib_cm] [ 4199.912422] [] ? printk+0x57/0x73 [ 4199.912578] [] ib_destroy_cm_id+0x10/0x20 [ib_cm] [ 4199.912759] [] rdma_destroy_id+0xac/0x340 [rdma_cm] [ 4199.912941] [] 0xffffffffc076f2cc Signed-off-by: Andrew Boyer Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_verbs.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 19841c863daf..de39b0a081de 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1007,11 +1007,19 @@ static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt) static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct rxe_cq *cq = to_rcq(ibcq); + unsigned long irq_flags; + int ret = 0; + spin_lock_irqsave(&cq->cq_lock, irq_flags); if (cq->notify != IB_CQ_NEXT_COMP) cq->notify = flags & IB_CQ_SOLICITED_MASK; - return 0; + if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !queue_empty(cq->queue)) + ret = 1; + + spin_unlock_irqrestore(&cq->cq_lock, irq_flags); + + return ret; } static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) -- cgit v1.2.3 From 5b9ea16c5488e577b26cc198ac63550b746ce3b9 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Wed, 23 Nov 2016 12:39:23 -0500 Subject: IB/rxe: Fix ref leak in rxe_create_qp() The udata->inlen error path needs to clean up the ref added by rxe_alloc(). Signed-off-by: Andrew Boyer Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_verbs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index de39b0a081de..071430c594b7 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -564,7 +564,7 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, if (udata) { if (udata->inlen) { err = -EINVAL; - goto err1; + goto err2; } qp->is_user = 1; } @@ -573,12 +573,13 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, err = rxe_qp_from_init(rxe, qp, pd, init, udata, ibpd); if (err) - goto err2; + goto err3; return &qp->ibqp; -err2: +err3: rxe_drop_index(qp); +err2: rxe_drop_ref(qp); err1: return ERR_PTR(err); -- cgit v1.2.3 From 5407f530122aa63cf304eb0874c938b3bdb8d3fb Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Wed, 23 Nov 2016 12:39:24 -0500 Subject: IB/rxe: Fix ref leak in duplicate_request() A ref was added after the call to skb_clone(). Signed-off-by: Andrew Boyer Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_resp.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index a5e9ce34171b..8643797fb530 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -1145,6 +1145,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, pkt, skb_copy); if (rc) { pr_err("Failed resending result. This flow is not handled - skb ignored\n"); + rxe_drop_ref(qp); kfree_skb(skb_copy); rc = RESPST_CLEANUP; goto out; -- cgit v1.2.3 From 07bf9627d5f1c0334fc543a5435a31a3b5907944 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Mon, 5 Dec 2016 08:43:20 -0500 Subject: IB/rxe: Wait for tasklets to finish before tearing down QP The system may crash when a malformed request is received and the error is detected by the responder. NodeA: $ ibv_rc_pingpong -g 0 -d rxe0 -i 1 -n 1 -s 50000 NodeB: $ ibv_rc_pingpong -g 0 -d rxe0 -i 1 -n 1 -s 1024 The responder generates a receive error on node B since the incoming SEND is oversized. If the client tears down the QP before the responder or the completer finish running, a page fault may occur. The fix makes the destroy operation spin until the tasks complete, which appears to be original intent of the design. Signed-off-by: Andrew Boyer Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_task.c | 19 +++++++++++++++++++ drivers/infiniband/sw/rxe/rxe_task.h | 1 + 2 files changed, 20 insertions(+) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index 1e19bf828a6e..d2a14a1bdc7f 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -121,6 +121,7 @@ int rxe_init_task(void *obj, struct rxe_task *task, task->arg = arg; task->func = func; snprintf(task->name, sizeof(task->name), "%s", name); + task->destroyed = false; tasklet_init(&task->tasklet, rxe_do_task, (unsigned long)task); @@ -132,11 +133,29 @@ int rxe_init_task(void *obj, struct rxe_task *task, void rxe_cleanup_task(struct rxe_task *task) { + unsigned long flags; + bool idle; + + /* + * Mark the task, then wait for it to finish. It might be + * running in a non-tasklet (direct call) context. + */ + task->destroyed = true; + + do { + spin_lock_irqsave(&task->state_lock, flags); + idle = (task->state == TASK_STATE_START); + spin_unlock_irqrestore(&task->state_lock, flags); + } while (!idle); + tasklet_kill(&task->tasklet); } void rxe_run_task(struct rxe_task *task, int sched) { + if (task->destroyed) + return; + if (sched) tasklet_schedule(&task->tasklet); else diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index d14aa6daed05..08ff42d451c6 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -54,6 +54,7 @@ struct rxe_task { int (*func)(void *arg); int ret; char name[16]; + bool destroyed; }; /* -- cgit v1.2.3 From 37f69f43fb5aba4288d38ea32bbe0dfdb412c763 Mon Sep 17 00:00:00 2001 From: Andrew Boyer Date: Mon, 5 Dec 2016 08:43:21 -0500 Subject: IB/rxe: Hold refs when running tasklets It might be possible for all of a QP's references to be dropped while one of that QP's tasklets is running. For example, the completer might run during QP destroy. If qp->valid is false, it will drop all of the packets on the resp_pkts list, potentially removing the last reference. Then it tries to advance the SQ consumer pointer. If the SQ's buffer has already been destroyed, the system will panic. To be safe, hold a reference on the QP for the duration of each tasklet. Signed-off-by: Andrew Boyer Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_comp.c | 4 ++++ drivers/infiniband/sw/rxe/rxe_req.c | 5 ++++- drivers/infiniband/sw/rxe/rxe_resp.c | 3 +++ 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index d46c49b33b13..cd27cbde7652 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -511,6 +511,8 @@ int rxe_completer(void *arg) struct rxe_pkt_info *pkt = NULL; enum comp_state state; + rxe_add_ref(qp); + if (!qp->valid) { while ((skb = skb_dequeue(&qp->resp_pkts))) { rxe_drop_ref(qp); @@ -740,11 +742,13 @@ exit: /* we come here if we are done with processing and want the task to * exit from the loop calling us */ + rxe_drop_ref(qp); return -EAGAIN; done: /* we come here if we have processed a packet we want the task to call * us again to see if there is anything else to do */ + rxe_drop_ref(qp); return 0; } diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 205222909e53..b246653cf713 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -596,6 +596,8 @@ int rxe_requester(void *arg) struct rxe_send_wqe rollback_wqe; u32 rollback_psn; + rxe_add_ref(qp); + next_wqe: if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) goto exit; @@ -750,9 +752,10 @@ complete: while (rxe_completer(qp) == 0) ; } - + rxe_drop_ref(qp); return 0; exit: + rxe_drop_ref(qp); return -EAGAIN; } diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 8643797fb530..7a36ec9dbc0c 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -1212,6 +1212,8 @@ int rxe_responder(void *arg) struct rxe_pkt_info *pkt = NULL; int ret = 0; + rxe_add_ref(qp); + qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; if (!qp->valid) { @@ -1400,5 +1402,6 @@ int rxe_responder(void *arg) exit: ret = -EAGAIN; done: + rxe_drop_ref(qp); return ret; } -- cgit v1.2.3 From d680ebed91e0b45c43ae03a880a0b43211096161 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 16 Nov 2016 10:39:16 +0200 Subject: IB/rxe: Increase max number of completions to 32k Increase limit of max CQE from 8K to 32K to allow demanding applications to work over SoftRoCE with same configuration as most RoCEv2 HW vendors have. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_param.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index f459c43a77c8..13ed2cc6eaa2 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -82,7 +82,7 @@ enum rxe_device_param { RXE_MAX_SGE = 32, RXE_MAX_SGE_RD = 32, RXE_MAX_CQ = 16384, - RXE_MAX_LOG_CQE = 13, + RXE_MAX_LOG_CQE = 15, RXE_MAX_MR = 2 * 1024, RXE_MAX_PD = 0x7ffc, RXE_MAX_QP_RD_ATOM = 128, -- cgit v1.2.3 From 477864c8fcd953e5a988073ca5be18bb7fd93410 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Wed, 23 Nov 2016 08:23:24 +0200 Subject: IB/core: Let create_ah return extended response to user Add struct ib_udata to the signature of create_ah callback that is implemented by IB device drivers. This allows HW drivers to return extra data to the userspace library. This patch prepares the ground for mlx5 driver to resolve destination mac address for a given GID and return it to userspace. This patch was previously submitted by Knut Omang as a part of the patch set to support Oracle's Infiniband HCA (SIF). Signed-off-by: Knut Omang Signed-off-by: Moni Shoua Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 11 ++++++++++- drivers/infiniband/core/verbs.c | 2 +- drivers/infiniband/hw/cxgb3/iwch_provider.c | 3 ++- drivers/infiniband/hw/cxgb4/provider.c | 4 +++- drivers/infiniband/hw/hns/hns_roce_ah.c | 3 ++- drivers/infiniband/hw/hns/hns_roce_device.h | 3 ++- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 4 +++- drivers/infiniband/hw/mlx4/ah.c | 4 +++- drivers/infiniband/hw/mlx4/mlx4_ib.h | 3 ++- drivers/infiniband/hw/mlx5/ah.c | 4 +++- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++- drivers/infiniband/hw/mthca/mthca_provider.c | 4 +++- drivers/infiniband/hw/nes/nes_verbs.c | 3 ++- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 3 ++- drivers/infiniband/hw/ocrdma/ocrdma_ah.h | 4 +++- drivers/infiniband/hw/qedr/verbs.c | 3 ++- drivers/infiniband/hw/qedr/verbs.h | 3 ++- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 4 +++- drivers/infiniband/hw/usnic/usnic_ib_verbs.h | 4 +++- drivers/infiniband/sw/rxe/rxe_verbs.c | 4 +++- include/rdma/ib_verbs.h | 3 ++- 21 files changed, 58 insertions(+), 21 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 77bb15f66414..cdb935ddab69 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2877,6 +2877,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, struct ib_ah *ah; struct ib_ah_attr attr; int ret; + struct ib_udata udata; if (out_len < sizeof resp) return -ENOSPC; @@ -2884,6 +2885,10 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long)cmd.response + sizeof(resp), + in_len - sizeof(cmd), out_len - sizeof(resp)); + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); if (!uobj) return -ENOMEM; @@ -2910,12 +2915,16 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, memset(&attr.dmac, 0, sizeof(attr.dmac)); memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); - ah = ib_create_ah(pd, &attr); + ah = pd->device->create_ah(pd, &attr, &udata); + if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto err_put; } + ah->device = pd->device; + ah->pd = pd; + atomic_inc(&pd->usecnt); ah->uobject = uobj; uobj->object = ah; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index cacee169cd55..c976f29f7ad2 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -315,7 +315,7 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) { struct ib_ah *ah; - ah = pd->device->create_ah(pd, ah_attr); + ah = pd->device->create_ah(pd, ah_attr, NULL); if (!IS_ERR(ah)) { ah->device = pd->device; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index cba57bb53dba..9d5fe1853da4 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -62,7 +62,8 @@ #include "common.h" static struct ib_ah *iwch_ah_create(struct ib_pd *pd, - struct ib_ah_attr *ah_attr) + struct ib_ah_attr *ah_attr, + struct ib_udata *udata) { return ERR_PTR(-ENOSYS); } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 645e606a17c5..49b51b7e0fd7 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -59,7 +59,9 @@ module_param(fastreg_support, int, 0644); MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)"); static struct ib_ah *c4iw_ah_create(struct ib_pd *pd, - struct ib_ah_attr *ah_attr) + struct ib_ah_attr *ah_attr, + struct ib_udata *udata) + { return ERR_PTR(-ENOSYS); } diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 24f79ee39fdf..0ac294db3b29 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -39,7 +39,8 @@ #define HNS_ROCE_VLAN_SL_BIT_MASK 7 #define HNS_ROCE_VLAN_SL_SHIFT 13 -struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr) +struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device); struct device *dev = &hr_dev->pdev->dev; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 341731553a60..470615f7b517 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -667,7 +667,8 @@ int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt, void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, unsigned long obj, int cnt); -struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata); int hns_roce_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); int hns_roce_destroy_ah(struct ib_ah *ah); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 6329c971c22f..f03fc1527d70 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2562,7 +2562,9 @@ static int i40iw_query_pkey(struct ib_device *ibdev, * @ah_attr: address handle attributes */ static struct ib_ah *i40iw_create_ah(struct ib_pd *ibpd, - struct ib_ah_attr *attr) + struct ib_ah_attr *attr, + struct ib_udata *udata) + { return ERR_PTR(-ENOSYS); } diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 6be7dc320ff7..20c6d17ac8b8 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -126,7 +126,9 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr return &ah->ibah; } -struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata) + { struct mlx4_ib_ah *ah; struct ib_ah *ret; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 35141f451e5c..7f3d976d81ed 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -742,7 +742,8 @@ int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); -struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata); int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); int mlx4_ib_destroy_ah(struct ib_ah *ah); diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 745efa4cfc71..ecac9ea2c85f 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -64,7 +64,9 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, return &ah->ibah; } -struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata) + { struct mlx5_ib_ah *ah; struct mlx5_ib_dev *dev = to_mdev(pd->device); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 95937e7d2584..ff05afa864ee 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -739,7 +739,8 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const void *in_mad, void *response_mad); -struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); int mlx5_ib_destroy_ah(struct ib_ah *ah); struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 358930a41e36..d31708742ba5 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -410,7 +410,9 @@ static int mthca_dealloc_pd(struct ib_pd *pd) } static struct ib_ah *mthca_ah_create(struct ib_pd *pd, - struct ib_ah_attr *ah_attr) + struct ib_ah_attr *ah_attr, + struct ib_udata *udata) + { int err; struct mthca_ah *ah; diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index bd69125731c1..0bb857c7e439 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -771,7 +771,8 @@ static int nes_dealloc_pd(struct ib_pd *ibpd) /** * nes_create_ah */ -static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata) { return ERR_PTR(-ENOSYS); } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 797362a297b2..14d33b0f3950 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -154,7 +154,8 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, return status; } -struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) +struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, + struct ib_udata *udata) { u32 *ahid_addr; int status; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 3856dd4c7e3d..0704a24b17c8 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -50,7 +50,9 @@ enum { OCRDMA_AH_L3_TYPE_MASK = 0x03, OCRDMA_AH_L3_TYPE_SHIFT = 0x1D /* 29 bits */ }; -struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *); + +struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *, + struct ib_udata *); int ocrdma_destroy_ah(struct ib_ah *); int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *); int ocrdma_modify_ah(struct ib_ah *, struct ib_ah_attr *); diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index a61514296767..ccff6c6e3f33 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2094,7 +2094,8 @@ int qedr_destroy_qp(struct ib_qp *ibqp) return rc; } -struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) +struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, + struct ib_udata *udata) { struct qedr_ah *ah; diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index a9b5e67bb81e..070677ca4d19 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -70,7 +70,8 @@ int qedr_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *); int qedr_destroy_qp(struct ib_qp *ibqp); -struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr); +struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, + struct ib_udata *udata); int qedr_destroy_ah(struct ib_ah *ibah); int qedr_dereg_mr(struct ib_mr *); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index a5bfbba6bbac..fd2a50eb4c91 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -738,7 +738,9 @@ int usnic_ib_mmap(struct ib_ucontext *context, /* In ib callbacks section - Start of stub funcs */ struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, - struct ib_ah_attr *ah_attr) + struct ib_ah_attr *ah_attr, + struct ib_udata *udata) + { usnic_dbg("\n"); return ERR_PTR(-EPERM); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index 0d9d2e6a14d5..0ed8e072329e 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -75,7 +75,9 @@ int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext); int usnic_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, - struct ib_ah_attr *ah_attr); + struct ib_ah_attr *ah_attr, + struct ib_udata *udata); + int usnic_ib_destroy_ah(struct ib_ah *ah); int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 19841c863daf..187d85ccfe58 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -316,7 +316,9 @@ static int rxe_init_av(struct rxe_dev *rxe, struct ib_ah_attr *attr, return err; } -static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) +static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, + struct ib_udata *udata) + { int err; struct rxe_dev *rxe = to_rdev(ibpd->device); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0c6f973e407c..73417a22ee4d 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1951,7 +1951,8 @@ struct ib_device { struct ib_udata *udata); int (*dealloc_pd)(struct ib_pd *pd); struct ib_ah * (*create_ah)(struct ib_pd *pd, - struct ib_ah_attr *ah_attr); + struct ib_ah_attr *ah_attr, + struct ib_udata *udata); int (*modify_ah)(struct ib_ah *ah, struct ib_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, -- cgit v1.2.3 From 6efaf10f163d9a60d1d4b2a049b194a53537ba1b Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 19 Oct 2016 14:07:19 +0200 Subject: IB/rdmavt: Avoid queuing work into a destroyed cq kthread worker The memory barrier is not enough to protect queuing works into a destroyed cq kthread. Just imagine the following situation: CPU1 CPU2 rvt_cq_enter() worker = cq->rdi->worker; rvt_cq_exit() rdi->worker = NULL; smp_wmb(); kthread_flush_worker(worker); kthread_stop(worker->task); kfree(worker); // nothing queued yet => // nothing flushed and // happily stopped and freed if (likely(worker)) { // true => read before CPU2 acted cq->notify = RVT_CQ_NONE; cq->triggered++; kthread_queue_work(worker, &cq->comptask); BANG: worker has been flushed/stopped/freed in the meantime. This patch solves this by protecting the critical sections by rdi->n_cqs_lock. It seems that this lock is not much contended and looks reasonable for this purpose. One catch is that rvt_cq_enter() might be called from IRQ context. Therefore we must always take the lock with IRQs disabled to avoid a possible deadlock. Signed-off-by: Petr Mladek Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/cq.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 6d9904a4a0ab..223ec4589fc7 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -119,18 +119,17 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) if (cq->notify == IB_CQ_NEXT_COMP || (cq->notify == IB_CQ_SOLICITED && (solicited || entry->status != IB_WC_SUCCESS))) { - struct kthread_worker *worker; /* * This will cause send_complete() to be called in * another thread. */ - smp_read_barrier_depends(); /* see rvt_cq_exit */ - worker = cq->rdi->worker; - if (likely(worker)) { + spin_lock(&cq->rdi->n_cqs_lock); + if (likely(cq->rdi->worker)) { cq->notify = RVT_CQ_NONE; cq->triggered++; - kthread_queue_work(worker, &cq->comptask); + kthread_queue_work(cq->rdi->worker, &cq->comptask); } + spin_unlock(&cq->rdi->n_cqs_lock); } spin_unlock_irqrestore(&cq->lock, flags); @@ -240,15 +239,15 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, } } - spin_lock(&rdi->n_cqs_lock); + spin_lock_irq(&rdi->n_cqs_lock); if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) { - spin_unlock(&rdi->n_cqs_lock); + spin_unlock_irq(&rdi->n_cqs_lock); ret = ERR_PTR(-ENOMEM); goto bail_ip; } rdi->n_cqs_allocated++; - spin_unlock(&rdi->n_cqs_lock); + spin_unlock_irq(&rdi->n_cqs_lock); if (cq->ip) { spin_lock_irq(&rdi->pending_lock); @@ -296,9 +295,9 @@ int rvt_destroy_cq(struct ib_cq *ibcq) struct rvt_dev_info *rdi = cq->rdi; kthread_flush_work(&cq->comptask); - spin_lock(&rdi->n_cqs_lock); + spin_lock_irq(&rdi->n_cqs_lock); rdi->n_cqs_allocated--; - spin_unlock(&rdi->n_cqs_lock); + spin_unlock_irq(&rdi->n_cqs_lock); if (cq->ip) kref_put(&cq->ip->ref, rvt_release_mmap_info); else @@ -541,12 +540,15 @@ void rvt_cq_exit(struct rvt_dev_info *rdi) { struct kthread_worker *worker; - worker = rdi->worker; - if (!worker) + /* block future queuing from send_complete() */ + spin_lock_irq(&rdi->n_cqs_lock); + if (!rdi->worker) { + spin_unlock_irq(&rdi->n_cqs_lock); return; - /* blocks future queuing from send_complete() */ + } rdi->worker = NULL; - smp_wmb(); /* See rdi_cq_enter */ + spin_unlock_irq(&rdi->n_cqs_lock); + kthread_flush_worker(worker); kthread_stop(worker->task); kfree(worker); -- cgit v1.2.3 From f5eabf5e5129e8ab5b3e7f50b24444aca1680e64 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 19 Oct 2016 14:07:20 +0200 Subject: IB/rdmavt: Handle the kthread worker using the new API Use the new API to create and destroy the cq kthread worker. The API hides some implementation details. In particular, kthread_create_worker() allocates and initializes struct kthread_worker. It runs the kthread the right way and stores task_struct into the worker structure. In addition, the *on_cpu() variant binds the kthread to the given cpu and the related memory node. kthread_destroy_worker() flushes all pending works, stops the kthread and frees the structure. This patch does not change the existing behavior. Note that we must use the on_cpu() variant because the function starts the kthread and it must bind it to the right CPU before waking. The numa node is associated for given CPU as well. Signed-off-by: Petr Mladek Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/cq.c | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 223ec4589fc7..4d0b6992e847 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -503,33 +503,23 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) */ int rvt_driver_cq_init(struct rvt_dev_info *rdi) { - int ret = 0; int cpu; - struct task_struct *task; + struct kthread_worker *worker; if (rdi->worker) return 0; + spin_lock_init(&rdi->n_cqs_lock); - rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL); - if (!rdi->worker) - return -ENOMEM; - kthread_init_worker(rdi->worker); - task = kthread_create_on_node( - kthread_worker_fn, - rdi->worker, - rdi->dparms.node, - "%s", rdi->dparms.cq_name); - if (IS_ERR(task)) { - kfree(rdi->worker); - rdi->worker = NULL; - return PTR_ERR(task); - } - set_user_nice(task, MIN_NICE); cpu = cpumask_first(cpumask_of_node(rdi->dparms.node)); - kthread_bind(task, cpu); - wake_up_process(task); - return ret; + worker = kthread_create_worker_on_cpu(cpu, 0, + "%s", rdi->dparms.cq_name); + if (IS_ERR(worker)) + return PTR_ERR(worker); + + set_user_nice(worker->task, MIN_NICE); + rdi->worker = worker; + return 0; } /** @@ -549,7 +539,5 @@ void rvt_cq_exit(struct rvt_dev_info *rdi) rdi->worker = NULL; spin_unlock_irq(&rdi->n_cqs_lock); - kthread_flush_worker(worker); - kthread_stop(worker->task); - kfree(worker); + kthread_destroy_worker(worker); } -- cgit v1.2.3 From 22dccc5454a39427de7b87a080d026b6bf66a7b9 Mon Sep 17 00:00:00 2001 From: Jim Foraker Date: Tue, 1 Nov 2016 13:44:12 -0700 Subject: IB/rdmavt: Only put mmap_info ref if it exists rvt_create_qp() creates qp->ip only when a qp creation request comes from userspace (udata is not NULL). If we exceed the number of available queue pairs however, the error path always attempts to put a kref to this structure. If the requestor is inside the kernel, this leads to a crash. We fix this by checking that qp->ip is not NULL before caling kref_put(). Signed-off-by: Jim Foraker Acked-by: Dennis Dalessandro Acked-by: Jonathan Toppins Acked-by: Alex Estrin Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/sw') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 9e14addd690c..2a13ac660f2b 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -901,7 +901,8 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, return ret; bail_ip: - kref_put(&qp->ip->ref, rvt_release_mmap_info); + if (qp->ip) + kref_put(&qp->ip->ref, rvt_release_mmap_info); bail_qpn: free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); -- cgit v1.2.3