summaryrefslogtreecommitdiff
path: root/drivers/infiniband/sw/rxe
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-09-02 02:49:33 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-09-02 02:49:33 +0300
commitf7e97ce26972ae7be8bbbae8d819ff311d4c5900 (patch)
tree6750e2bab2c7b3fafc30d9bd2cbe2be7645c7ac5 /drivers/infiniband/sw/rxe
parent2fcbb03847d89155d7b33d75ffee3a6bc5c51c97 (diff)
parentf5acc36b0714b7b8510a8b436087d33a65cb05f4 (diff)
downloadlinux-f7e97ce26972ae7be8bbbae8d819ff311d4c5900.tar.xz
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "Many small changes across the subystem, some highlights: - Usual driver cleanups in qedr, siw, erdma, hfi1, mlx4/5, irdma, mthca, hns, and bnxt_re - siw now works over tunnel and other netdevs with a MAC address by removing assumptions about a MAC/GID from the connection manager - "Doorbell Pacing" for bnxt_re - this is a best effort scheme to allow userspace to slow down the doorbell rings if the HW gets full - irdma egress VLAN priority, better QP/WQ sizing - rxe bug fixes in queue draining and srq resizing - Support more ethernet speed options in the core layer - DMABUF support for bnxt_re - Multi-stage MTT support for erdma to allow much bigger MR registrations - A irdma fix with a CVE that came in too late to go to -rc, missing bounds checking for 0 length MRs" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (87 commits) IB/hfi1: Reduce printing of errors during driver shut down RDMA/hfi1: Move user SDMA system memory pinning code to its own file RDMA/hfi1: Use list_for_each_entry() helper RDMA/mlx5: Fix trailing */ formatting in block comment RDMA/rxe: Fix redundant break statement in switch-case. RDMA/efa: Fix wrong resources deallocation order RDMA/siw: Call llist_reverse_order in siw_run_sq RDMA/siw: Correct wrong debug message RDMA/siw: Balance the reference of cep->kref in the error path Revert "IB/isert: Fix incorrect release of isert connection" RDMA/bnxt_re: Fix kernel doc errors RDMA/irdma: Prevent zero-length STAG registration RDMA/erdma: Implement hierarchical MTT RDMA/erdma: Refactor the storage structure of MTT entries RDMA/erdma: Renaming variable names and field names of struct erdma_mem RDMA/hns: Support hns HW stats RDMA/hns: Dump whole QP/CQ/MR resource in raw RDMA/irdma: Add missing kernel-doc in irdma_setup_umode_qp() RDMA/mlx4: Copy union directly RDMA/irdma: Drop unused kernel push code ...
Diffstat (limited to 'drivers/infiniband/sw/rxe')
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c159
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c45
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_srq.c60
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c1
7 files changed, 177 insertions, 102 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 5111735aafae..d0bdc2d8adc8 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -597,6 +597,10 @@ static void flush_send_queue(struct rxe_qp *qp, bool notify)
struct rxe_queue *q = qp->sq.queue;
int err;
+ /* send queue never got created. nothing to do. */
+ if (!qp->sq.queue)
+ return;
+
while ((wqe = queue_head(q, q->type))) {
if (notify) {
err = flush_send_wqe(qp, wqe);
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 666e06a82bc9..4d2a8ef52c85 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -136,12 +136,6 @@ static inline int qp_mtu(struct rxe_qp *qp)
return IB_MTU_4096;
}
-static inline int rcv_wqe_size(int max_sge)
-{
- return sizeof(struct rxe_recv_wqe) +
- max_sge * sizeof(struct ib_sge);
-}
-
void free_rd_atomic_resource(struct resp_res *res);
static inline void rxe_advance_resp_resource(struct rxe_qp *qp)
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index a569b111a9d2..28e379c108bc 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -183,13 +183,63 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
atomic_set(&qp->skb_out, 0);
}
+static int rxe_init_sq(struct rxe_qp *qp, struct ib_qp_init_attr *init,
+ struct ib_udata *udata,
+ struct rxe_create_qp_resp __user *uresp)
+{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ int wqe_size;
+ int err;
+
+ qp->sq.max_wr = init->cap.max_send_wr;
+ wqe_size = max_t(int, init->cap.max_send_sge * sizeof(struct ib_sge),
+ init->cap.max_inline_data);
+ qp->sq.max_sge = wqe_size / sizeof(struct ib_sge);
+ qp->sq.max_inline = wqe_size;
+ wqe_size += sizeof(struct rxe_send_wqe);
+
+ qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr, wqe_size,
+ QUEUE_TYPE_FROM_CLIENT);
+ if (!qp->sq.queue) {
+ rxe_err_qp(qp, "Unable to allocate send queue");
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ /* prepare info for caller to mmap send queue if user space qp */
+ err = do_mmap_info(rxe, uresp ? &uresp->sq_mi : NULL, udata,
+ qp->sq.queue->buf, qp->sq.queue->buf_size,
+ &qp->sq.queue->ip);
+ if (err) {
+ rxe_err_qp(qp, "do_mmap_info failed, err = %d", err);
+ goto err_free;
+ }
+
+ /* return actual capabilities to caller which may be larger
+ * than requested
+ */
+ init->cap.max_send_wr = qp->sq.max_wr;
+ init->cap.max_send_sge = qp->sq.max_sge;
+ init->cap.max_inline_data = qp->sq.max_inline;
+
+ return 0;
+
+err_free:
+ vfree(qp->sq.queue->buf);
+ kfree(qp->sq.queue);
+ qp->sq.queue = NULL;
+err_out:
+ return err;
+}
+
static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
struct ib_qp_init_attr *init, struct ib_udata *udata,
struct rxe_create_qp_resp __user *uresp)
{
int err;
- int wqe_size;
- enum queue_type type;
+
+ /* if we don't finish qp create make sure queue is valid */
+ skb_queue_head_init(&qp->req_pkts);
err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk);
if (err < 0)
@@ -204,32 +254,10 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
* (0xc000 - 0xffff).
*/
qp->src_port = RXE_ROCE_V2_SPORT + (hash_32(qp_num(qp), 14) & 0x3fff);
- qp->sq.max_wr = init->cap.max_send_wr;
-
- /* These caps are limited by rxe_qp_chk_cap() done by the caller */
- wqe_size = max_t(int, init->cap.max_send_sge * sizeof(struct ib_sge),
- init->cap.max_inline_data);
- qp->sq.max_sge = init->cap.max_send_sge =
- wqe_size / sizeof(struct ib_sge);
- qp->sq.max_inline = init->cap.max_inline_data = wqe_size;
- wqe_size += sizeof(struct rxe_send_wqe);
- type = QUEUE_TYPE_FROM_CLIENT;
- qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr,
- wqe_size, type);
- if (!qp->sq.queue)
- return -ENOMEM;
-
- err = do_mmap_info(rxe, uresp ? &uresp->sq_mi : NULL, udata,
- qp->sq.queue->buf, qp->sq.queue->buf_size,
- &qp->sq.queue->ip);
-
- if (err) {
- vfree(qp->sq.queue->buf);
- kfree(qp->sq.queue);
- qp->sq.queue = NULL;
+ err = rxe_init_sq(qp, init, udata, uresp);
+ if (err)
return err;
- }
qp->req.wqe_index = queue_get_producer(qp->sq.queue,
QUEUE_TYPE_FROM_CLIENT);
@@ -248,36 +276,65 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
return 0;
}
+static int rxe_init_rq(struct rxe_qp *qp, struct ib_qp_init_attr *init,
+ struct ib_udata *udata,
+ struct rxe_create_qp_resp __user *uresp)
+{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ int wqe_size;
+ int err;
+
+ qp->rq.max_wr = init->cap.max_recv_wr;
+ qp->rq.max_sge = init->cap.max_recv_sge;
+ wqe_size = sizeof(struct rxe_recv_wqe) +
+ qp->rq.max_sge*sizeof(struct ib_sge);
+
+ qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr, wqe_size,
+ QUEUE_TYPE_FROM_CLIENT);
+ if (!qp->rq.queue) {
+ rxe_err_qp(qp, "Unable to allocate recv queue");
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ /* prepare info for caller to mmap recv queue if user space qp */
+ err = do_mmap_info(rxe, uresp ? &uresp->rq_mi : NULL, udata,
+ qp->rq.queue->buf, qp->rq.queue->buf_size,
+ &qp->rq.queue->ip);
+ if (err) {
+ rxe_err_qp(qp, "do_mmap_info failed, err = %d", err);
+ goto err_free;
+ }
+
+ /* return actual capabilities to caller which may be larger
+ * than requested
+ */
+ init->cap.max_recv_wr = qp->rq.max_wr;
+
+ return 0;
+
+err_free:
+ vfree(qp->rq.queue->buf);
+ kfree(qp->rq.queue);
+ qp->rq.queue = NULL;
+err_out:
+ return err;
+}
+
static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
struct ib_qp_init_attr *init,
struct ib_udata *udata,
struct rxe_create_qp_resp __user *uresp)
{
int err;
- int wqe_size;
- enum queue_type type;
+
+ /* if we don't finish qp create make sure queue is valid */
+ skb_queue_head_init(&qp->resp_pkts);
if (!qp->srq) {
- qp->rq.max_wr = init->cap.max_recv_wr;
- qp->rq.max_sge = init->cap.max_recv_sge;
-
- wqe_size = rcv_wqe_size(qp->rq.max_sge);
-
- type = QUEUE_TYPE_FROM_CLIENT;
- qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr,
- wqe_size, type);
- if (!qp->rq.queue)
- return -ENOMEM;
-
- err = do_mmap_info(rxe, uresp ? &uresp->rq_mi : NULL, udata,
- qp->rq.queue->buf, qp->rq.queue->buf_size,
- &qp->rq.queue->ip);
- if (err) {
- vfree(qp->rq.queue->buf);
- kfree(qp->rq.queue);
- qp->rq.queue = NULL;
+ err = rxe_init_rq(qp, init, udata, uresp);
+ if (err)
return err;
- }
}
rxe_init_task(&qp->resp.task, qp, rxe_responder);
@@ -307,10 +364,10 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
if (srq)
rxe_get(srq);
- qp->pd = pd;
- qp->rcq = rcq;
- qp->scq = scq;
- qp->srq = srq;
+ qp->pd = pd;
+ qp->rcq = rcq;
+ qp->scq = scq;
+ qp->srq = srq;
atomic_inc(&rcq->num_wq);
atomic_inc(&scq->num_wq);
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 2171f19494bc..d8c41fd626a9 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -578,10 +578,11 @@ static void save_state(struct rxe_send_wqe *wqe,
struct rxe_send_wqe *rollback_wqe,
u32 *rollback_psn)
{
- rollback_wqe->state = wqe->state;
+ rollback_wqe->state = wqe->state;
rollback_wqe->first_psn = wqe->first_psn;
- rollback_wqe->last_psn = wqe->last_psn;
- *rollback_psn = qp->req.psn;
+ rollback_wqe->last_psn = wqe->last_psn;
+ rollback_wqe->dma = wqe->dma;
+ *rollback_psn = qp->req.psn;
}
static void rollback_state(struct rxe_send_wqe *wqe,
@@ -589,10 +590,11 @@ static void rollback_state(struct rxe_send_wqe *wqe,
struct rxe_send_wqe *rollback_wqe,
u32 rollback_psn)
{
- wqe->state = rollback_wqe->state;
+ wqe->state = rollback_wqe->state;
wqe->first_psn = rollback_wqe->first_psn;
- wqe->last_psn = rollback_wqe->last_psn;
- qp->req.psn = rollback_psn;
+ wqe->last_psn = rollback_wqe->last_psn;
+ wqe->dma = rollback_wqe->dma;
+ qp->req.psn = rollback_psn;
}
static void update_state(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
@@ -797,6 +799,9 @@ int rxe_requester(struct rxe_qp *qp)
pkt.mask = rxe_opcode[opcode].mask;
pkt.wqe = wqe;
+ /* save wqe state before we build and send packet */
+ save_state(wqe, qp, &rollback_wqe, &rollback_psn);
+
av = rxe_get_av(&pkt, &ah);
if (unlikely(!av)) {
rxe_dbg_qp(qp, "Failed no address vector\n");
@@ -829,29 +834,29 @@ int rxe_requester(struct rxe_qp *qp)
if (ah)
rxe_put(ah);
- /*
- * To prevent a race on wqe access between requester and completer,
- * wqe members state and psn need to be set before calling
- * rxe_xmit_packet().
- * Otherwise, completer might initiate an unjustified retry flow.
- */
- save_state(wqe, qp, &rollback_wqe, &rollback_psn);
+ /* update wqe state as though we had sent it */
update_wqe_state(qp, wqe, &pkt);
update_wqe_psn(qp, wqe, &pkt, payload);
err = rxe_xmit_packet(qp, &pkt, skb);
if (err) {
- qp->need_req_skb = 1;
+ if (err != -EAGAIN) {
+ wqe->status = IB_WC_LOC_QP_OP_ERR;
+ goto err;
+ }
+ /* the packet was dropped so reset wqe to the state
+ * before we sent it so we can try to resend
+ */
rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
- if (err == -EAGAIN) {
- rxe_sched_task(&qp->req.task);
- goto exit;
- }
+ /* force a delay until the dropped packet is freed and
+ * the send queue is drained below the low water mark
+ */
+ qp->need_req_skb = 1;
- wqe->status = IB_WC_LOC_QP_OP_ERR;
- goto err;
+ rxe_sched_task(&qp->req.task);
+ goto exit;
}
update_state(qp, &pkt);
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 64c64f5f36a8..da470a925efc 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -1469,6 +1469,10 @@ static void flush_recv_queue(struct rxe_qp *qp, bool notify)
return;
}
+ /* recv queue not created. nothing to do. */
+ if (!qp->rq.queue)
+ return;
+
while ((wqe = queue_head(q, q->type))) {
if (notify) {
err = flush_recv_wqe(qp, wqe);
diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c
index 27ca82ec0826..3661cb627d28 100644
--- a/drivers/infiniband/sw/rxe/rxe_srq.c
+++ b/drivers/infiniband/sw/rxe/rxe_srq.c
@@ -45,40 +45,41 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_init_attr *init, struct ib_udata *udata,
struct rxe_create_srq_resp __user *uresp)
{
- int err;
- int srq_wqe_size;
struct rxe_queue *q;
- enum queue_type type;
+ int wqe_size;
+ int err;
- srq->ibsrq.event_handler = init->event_handler;
- srq->ibsrq.srq_context = init->srq_context;
- srq->limit = init->attr.srq_limit;
- srq->srq_num = srq->elem.index;
- srq->rq.max_wr = init->attr.max_wr;
- srq->rq.max_sge = init->attr.max_sge;
+ srq->ibsrq.event_handler = init->event_handler;
+ srq->ibsrq.srq_context = init->srq_context;
+ srq->limit = init->attr.srq_limit;
+ srq->srq_num = srq->elem.index;
+ srq->rq.max_wr = init->attr.max_wr;
+ srq->rq.max_sge = init->attr.max_sge;
- srq_wqe_size = rcv_wqe_size(srq->rq.max_sge);
+ wqe_size = sizeof(struct rxe_recv_wqe) +
+ srq->rq.max_sge*sizeof(struct ib_sge);
spin_lock_init(&srq->rq.producer_lock);
spin_lock_init(&srq->rq.consumer_lock);
- type = QUEUE_TYPE_FROM_CLIENT;
- q = rxe_queue_init(rxe, &srq->rq.max_wr, srq_wqe_size, type);
+ q = rxe_queue_init(rxe, &srq->rq.max_wr, wqe_size,
+ QUEUE_TYPE_FROM_CLIENT);
if (!q) {
rxe_dbg_srq(srq, "Unable to allocate queue\n");
- return -ENOMEM;
+ err = -ENOMEM;
+ goto err_out;
}
- srq->rq.queue = q;
-
err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, udata, q->buf,
q->buf_size, &q->ip);
if (err) {
- vfree(q->buf);
- kfree(q);
- return err;
+ rxe_dbg_srq(srq, "Unable to init mmap info for caller\n");
+ goto err_free;
}
+ srq->rq.queue = q;
+ init->attr.max_wr = srq->rq.max_wr;
+
if (uresp) {
if (copy_to_user(&uresp->srq_num, &srq->srq_num,
sizeof(uresp->srq_num))) {
@@ -88,6 +89,12 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
}
return 0;
+
+err_free:
+ vfree(q->buf);
+ kfree(q);
+err_out:
+ return err;
}
int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
@@ -145,9 +152,10 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
struct rxe_modify_srq_cmd *ucmd, struct ib_udata *udata)
{
- int err;
struct rxe_queue *q = srq->rq.queue;
struct mminfo __user *mi = NULL;
+ int wqe_size;
+ int err;
if (mask & IB_SRQ_MAX_WR) {
/*
@@ -156,12 +164,16 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
*/
mi = u64_to_user_ptr(ucmd->mmap_info_addr);
- err = rxe_queue_resize(q, &attr->max_wr,
- rcv_wqe_size(srq->rq.max_sge), udata, mi,
- &srq->rq.producer_lock,
+ wqe_size = sizeof(struct rxe_recv_wqe) +
+ srq->rq.max_sge*sizeof(struct ib_sge);
+
+ err = rxe_queue_resize(q, &attr->max_wr, wqe_size,
+ udata, mi, &srq->rq.producer_lock,
&srq->rq.consumer_lock);
if (err)
- goto err2;
+ goto err_free;
+
+ srq->rq.max_wr = attr->max_wr;
}
if (mask & IB_SRQ_LIMIT)
@@ -169,7 +181,7 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
return 0;
-err2:
+err_free:
rxe_queue_cleanup(q);
srq->rq.queue = NULL;
return err;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 903f0b71447e..48f86839d36a 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -798,7 +798,6 @@ static int init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
rxe_err_qp(qp, "unsupported wr opcode %d",
wr->opcode);
return -EINVAL;
- break;
}
}