diff options
Diffstat (limited to 'drivers/infiniband')
23 files changed, 121 insertions, 92 deletions
diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 738d1faf4bba..417ebf4d8ba9 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -288,7 +288,7 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port) struct rdma_counter *counter; int ret; - if (!qp->res.valid) + if (!qp->res.valid || rdma_is_kernel_res(&qp->res)) return 0; if (!rdma_is_port_valid(dev, port)) @@ -483,7 +483,7 @@ int rdma_counter_bind_qpn(struct ib_device *dev, u8 port, goto err; } - if (counter->res.task != qp->res.task) { + if (rdma_is_kernel_res(&counter->res) != rdma_is_kernel_res(&qp->res)) { ret = -EINVAL; goto err_task; } diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 905a2beaf885..eadba29432dd 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1339,6 +1339,10 @@ out: return ret; } +static void prevent_dealloc_device(struct ib_device *ib_dev) +{ +} + /** * ib_register_device - Register an IB device with IB core * @device: Device to register @@ -1409,11 +1413,11 @@ int ib_register_device(struct ib_device *device, const char *name) * possibility for a parallel unregistration along with this * error flow. Since we have a refcount here we know any * parallel flow is stopped in disable_device and will see the - * NULL pointers, causing the responsibility to + * special dealloc_driver pointer, causing the responsibility to * ib_dealloc_device() to revert back to this thread. */ dealloc_fn = device->ops.dealloc_driver; - device->ops.dealloc_driver = NULL; + device->ops.dealloc_driver = prevent_dealloc_device; ib_device_put(device); __ib_unregister_device(device); device->ops.dealloc_driver = dealloc_fn; @@ -1462,7 +1466,8 @@ static void __ib_unregister_device(struct ib_device *ib_dev) * Drivers using the new flow may not call ib_dealloc_device except * in error unwind prior to registration success. */ - if (ib_dev->ops.dealloc_driver) { + if (ib_dev->ops.dealloc_driver && + ib_dev->ops.dealloc_driver != prevent_dealloc_device) { WARN_ON(kref_read(&ib_dev->dev.kobj.kref) <= 1); ib_dealloc_device(ib_dev); } diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index e16105be2eb2..98cd6403ca60 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -738,9 +738,6 @@ static int fill_stat_counter_qps(struct sk_buff *msg, xa_lock(&rt->xa); xa_for_each(&rt->xa, id, res) { qp = container_of(res, struct ib_qp, res); - if (qp->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) - continue; - if (!qp->counter || (qp->counter->id != counter->id)) continue; diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index b48b3f6e632d..557644dcc923 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -770,6 +770,7 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) mr->uobject = uobj; atomic_inc(&pd->usecnt); mr->res.type = RDMA_RESTRACK_MR; + mr->iova = cmd.hca_va; rdma_restrack_uadd(&mr->res); uobj->object = mr; @@ -861,6 +862,9 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs) atomic_dec(&old_pd->usecnt); } + if (cmd.flags & IB_MR_REREG_TRANS) + mr->iova = cmd.hca_va; + memset(&resp, 0, sizeof(resp)); resp.lkey = mr->lkey; resp.rkey = mr->rkey; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 53d6505c0c7b..f369f0a19e85 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1712,7 +1712,7 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, if (!(rdma_protocol_ib(qp->device, attr->alt_ah_attr.port_num) && rdma_protocol_ib(qp->device, port))) { - ret = EINVAL; + ret = -EINVAL; goto out; } } diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index b12fbc857f94..5c41e13496a0 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -811,7 +811,8 @@ static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event, struct ib_event event; unsigned int flags; - if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR) { + if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR && + rdma_is_kernel_res(&qp->ib_qp.res)) { flags = bnxt_re_lock_cqs(qp); bnxt_qplib_add_flush_qp(&qp->qplib_qp); bnxt_re_unlock_cqs(qp, flags); diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 962dc97a8ff2..1e4f4e525598 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -399,7 +399,6 @@ static int finish_mem_reg(struct c4iw_mr *mhp, u32 stag) mmid = stag >> 8; mhp->ibmr.rkey = mhp->ibmr.lkey = stag; mhp->ibmr.length = mhp->attr.len; - mhp->ibmr.iova = mhp->attr.va_fbo; mhp->ibmr.page_size = 1U << (mhp->attr.page_size + 12); pr_debug("mmid 0x%x mhp %p\n", mmid, mhp); return xa_insert_irq(&mhp->rhp->mrs, mmid, mhp, GFP_KERNEL); diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index facff133139a..3ba299cfd0b5 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -3215,6 +3215,7 @@ bool hfi1_tid_rdma_wqe_interlock(struct rvt_qp *qp, struct rvt_swqe *wqe) case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: switch (prev->wr.opcode) { case IB_WR_TID_RDMA_WRITE: req = wqe_to_tid_req(prev); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 479fa557993e..c69453a62767 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -66,8 +66,6 @@ #define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2 #define HNS_ROCE_MIN_CQE_CNT 16 -#define HNS_ROCE_RESERVED_SGE 1 - #define HNS_ROCE_MAX_IRQ_NUM 128 #define HNS_ROCE_SGE_IN_WQE 2 diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0618ced45bf8..38a48ab3e1d0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -629,7 +629,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); - if (unlikely(wr->num_sge >= hr_qp->rq.max_gs)) { + if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { ibdev_err(ibdev, "rq:num_sge=%d >= qp->sq.max_gs=%d\n", wr->num_sge, hr_qp->rq.max_gs); ret = -EINVAL; @@ -649,7 +649,6 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, if (wr->num_sge < hr_qp->rq.max_gs) { dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); dseg->addr = 0; - dseg->len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); } /* rq support inline data */ @@ -783,8 +782,8 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, } if (wr->num_sge < srq->max_gs) { - dseg[i].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); - dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); + dseg[i].len = 0; + dseg[i].lkey = cpu_to_le32(0x100); dseg[i].addr = 0; } @@ -3053,6 +3052,7 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, IB_WC_RETRY_EXC_ERR }, { HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR, IB_WC_RNR_RETRY_EXC_ERR }, { HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR, IB_WC_REM_ABORT_ERR }, + { HNS_ROCE_CQE_V2_GENERAL_ERR, IB_WC_GENERAL_ERR} }; u32 cqe_status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M, @@ -3075,6 +3075,14 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, sizeof(*cqe), false); /* + * For hns ROCEE, GENERAL_ERR is an error type that is not defined in + * the standard protocol, the driver must ignore it and needn't to set + * the QP to an error state. + */ + if (cqe_status == HNS_ROCE_CQE_V2_GENERAL_ERR) + return; + + /* * Hip08 hardware cannot flush the WQEs in SQ/RQ if the QP state gets * into errored mode. Hence, as a workaround to this hardware * limitation, driver needs to assist in flushing. But the flushing @@ -4301,7 +4309,9 @@ static bool check_qp_state(enum ib_qp_state cur_state, [IB_QPS_RTR] = { [IB_QPS_RESET] = true, [IB_QPS_RTS] = true, [IB_QPS_ERR] = true }, - [IB_QPS_RTS] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true }, + [IB_QPS_RTS] = { [IB_QPS_RESET] = true, + [IB_QPS_RTS] = true, + [IB_QPS_ERR] = true }, [IB_QPS_SQD] = {}, [IB_QPS_SQE] = {}, [IB_QPS_ERR] = { [IB_QPS_RESET] = true, [IB_QPS_ERR] = true } @@ -5087,7 +5097,7 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) attr->srq_limit = limit_wl; attr->max_wr = srq->wqe_cnt - 1; - attr->max_sge = srq->max_gs - HNS_ROCE_RESERVED_SGE; + attr->max_sge = srq->max_gs; out: hns_roce_free_cmd_mailbox(hr_dev, mailbox); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index e176b0aaa4ac..4f840997c6c7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -92,9 +92,7 @@ #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 -#define HNS_ROCE_INVALID_LKEY 0x0 -#define HNS_ROCE_INVALID_SGE_LENGTH 0x80000000 - +#define HNS_ROCE_INVALID_LKEY 0x100 #define HNS_ROCE_CMQ_TX_TIMEOUT 30000 #define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2 #define HNS_ROCE_V2_RSV_QPS 8 @@ -230,6 +228,7 @@ enum { HNS_ROCE_CQE_V2_TRANSPORT_RETRY_EXC_ERR = 0x15, HNS_ROCE_CQE_V2_RNR_RETRY_EXC_ERR = 0x16, HNS_ROCE_CQE_V2_REMOTE_ABORT_ERR = 0x22, + HNS_ROCE_CQE_V2_GENERAL_ERR = 0x23, HNS_ROCE_V2_CQE_STATUS_MASK = 0xff, }; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index a0a47bd66975..4edea397b6b8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -386,8 +386,7 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, return -EINVAL; } - hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) + - HNS_ROCE_RESERVED_SGE); + hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE) hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); @@ -402,7 +401,7 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, hr_qp->rq_inl_buf.wqe_cnt = 0; cap->max_recv_wr = cnt; - cap->max_recv_sge = hr_qp->rq.max_gs - HNS_ROCE_RESERVED_SGE; + cap->max_recv_sge = hr_qp->rq.max_gs; return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index f40a000e94ee..b9e2dbd372b6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -297,7 +297,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, spin_lock_init(&srq->lock); srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1); - srq->max_gs = init_attr->attr.max_sge + HNS_ROCE_RESERVED_SGE; + srq->max_gs = init_attr->attr.max_sge; if (udata) { ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 7e0b205c05eb..d7c78f841d2f 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -439,7 +439,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; mr->ibmr.length = length; - mr->ibmr.iova = virt_addr; mr->ibmr.page_size = 1U << shift; return &mr->ibmr; diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index fdf90ecb2699..460292179b32 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -235,6 +235,7 @@ struct qedr_ucontext { u32 dpi_size; u16 dpi; bool db_rec; + u8 edpm_mode; }; union db_prod32 { @@ -344,10 +345,10 @@ struct qedr_srq_hwq_info { u32 wqe_prod; u32 sge_prod; u32 wr_prod_cnt; - u32 wr_cons_cnt; + atomic_t wr_cons_cnt; u32 num_elems; - u32 *virt_prod_pair_addr; + struct rdma_srq_producers *virt_prod_pair_addr; dma_addr_t phy_prod_pair_addr; }; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 9b9e80266367..1a7f1f805be3 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -275,7 +275,8 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) DP_ERR(dev, "Problem copying data from user space\n"); return -EFAULT; } - + ctx->edpm_mode = !!(ureq.context_flags & + QEDR_ALLOC_UCTX_EDPM_MODE); ctx->db_rec = !!(ureq.context_flags & QEDR_ALLOC_UCTX_DB_REC); } @@ -316,11 +317,15 @@ int qedr_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) uresp.dpm_flags = QEDR_DPM_TYPE_IWARP_LEGACY; else uresp.dpm_flags = QEDR_DPM_TYPE_ROCE_ENHANCED | - QEDR_DPM_TYPE_ROCE_LEGACY; + QEDR_DPM_TYPE_ROCE_LEGACY | + QEDR_DPM_TYPE_ROCE_EDPM_MODE; - uresp.dpm_flags |= QEDR_DPM_SIZES_SET; - uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE; - uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE; + if (ureq.context_flags & QEDR_SUPPORT_DPM_SIZES) { + uresp.dpm_flags |= QEDR_DPM_SIZES_SET; + uresp.ldpm_limit_size = QEDR_LDPM_MAX_SIZE; + uresp.edpm_trans_size = QEDR_EDPM_TRANS_SIZE; + uresp.edpm_limit_size = QEDR_EDPM_MAX_SIZE; + } uresp.wids_enabled = 1; uresp.wid_count = oparams.wid_count; @@ -1750,7 +1755,7 @@ static int qedr_create_user_qp(struct qedr_dev *dev, struct qed_rdma_create_qp_out_params out_params; struct qedr_pd *pd = get_qedr_pd(ibpd); struct qedr_create_qp_uresp uresp; - struct qedr_ucontext *ctx = NULL; + struct qedr_ucontext *ctx = pd ? pd->uctx : NULL; struct qedr_create_qp_ureq ureq; int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); int rc = -EINVAL; @@ -1788,6 +1793,9 @@ static int qedr_create_user_qp(struct qedr_dev *dev, in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa; } + if (ctx) + SET_FIELD(in_params.flags, QED_ROCE_EDPM_MODE, ctx->edpm_mode); + qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx, &in_params, &out_params); @@ -3686,7 +3694,7 @@ static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq) * count and consumer count and subtract it from max * work request supported so that we get elements left. */ - used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt; + used = hw_srq->wr_prod_cnt - (u32)atomic_read(&hw_srq->wr_cons_cnt); return hw_srq->max_wr - used; } @@ -3701,7 +3709,6 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, unsigned long flags; int status = 0; u32 num_sge; - u32 offset; spin_lock_irqsave(&srq->lock, flags); @@ -3714,7 +3721,8 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, if (!qedr_srq_elem_left(hw_srq) || wr->num_sge > srq->hw_srq.max_sges) { DP_ERR(dev, "Can't post WR (%d,%d) || (%d > %d)\n", - hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt, + hw_srq->wr_prod_cnt, + atomic_read(&hw_srq->wr_cons_cnt), wr->num_sge, srq->hw_srq.max_sges); status = -ENOMEM; *bad_wr = wr; @@ -3748,22 +3756,20 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, hw_srq->sge_prod++; } - /* Flush WQE and SGE information before + /* Update WQE and SGE information before * updating producer. */ - wmb(); + dma_wmb(); /* SRQ producer is 8 bytes. Need to update SGE producer index * in first 4 bytes and need to update WQE producer in * next 4 bytes. */ - *srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod; - offset = offsetof(struct rdma_srq_producers, wqe_prod); - *((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) = - hw_srq->wqe_prod; + srq->hw_srq.virt_prod_pair_addr->sge_prod = hw_srq->sge_prod; + /* Make sure sge producer is updated first */ + dma_wmb(); + srq->hw_srq.virt_prod_pair_addr->wqe_prod = hw_srq->wqe_prod; - /* Flush producer after updating it. */ - wmb(); wr = wr->next; } @@ -4182,7 +4188,7 @@ static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp, } else { __process_resp_one(dev, qp, cq, wc, resp, wr_id); } - srq->hw_srq.wr_cons_cnt++; + atomic_inc(&srq->hw_srq.wr_cons_cnt); return 1; } diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 831ad578a7b2..46e111c218fd 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -330,10 +330,14 @@ err1: static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) { + struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); const struct ib_gid_attr *gid_attr; union ib_gid dgid; union ib_gid *pdgid; + if (pkt->mask & RXE_LOOPBACK_MASK) + return 0; + if (skb->protocol == htons(ETH_P_IP)) { ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, (struct in6_addr *)&dgid); @@ -366,7 +370,7 @@ void rxe_rcv(struct sk_buff *skb) if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES)) goto drop; - if (unlikely(rxe_match_dgid(rxe, skb) < 0)) { + if (rxe_match_dgid(rxe, skb) < 0) { pr_warn_ratelimited("failed matching dgid\n"); goto drop; } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index b8a22af724e8..84fec5fd798d 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -684,6 +684,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, unsigned int mask; unsigned int length = 0; int i; + struct ib_send_wr *next; while (wr) { mask = wr_opcode_mask(wr->opcode, qp); @@ -700,6 +701,8 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, break; } + next = wr->next; + length = 0; for (i = 0; i < wr->num_sge; i++) length += wr->sg_list[i].length; @@ -710,7 +713,7 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, *bad_wr = wr; break; } - wr = wr->next; + wr = next; } rxe_run_task(&qp->req.task, 1); diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 9a3379c49541..9ce6a36fe48e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -515,7 +515,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev); int ipoib_ib_dev_open_default(struct net_device *dev); int ipoib_ib_dev_open(struct net_device *dev); -int ipoib_ib_dev_stop(struct net_device *dev); +void ipoib_ib_dev_stop(struct net_device *dev); void ipoib_ib_dev_up(struct net_device *dev); void ipoib_ib_dev_down(struct net_device *dev); int ipoib_ib_dev_stop_default(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index da3c5315bbb5..494f413dc3c6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -670,13 +670,12 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, return rc; } -static void __ipoib_reap_ah(struct net_device *dev) +static void ipoib_reap_dead_ahs(struct ipoib_dev_priv *priv) { - struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_ah *ah, *tah; unsigned long flags; - netif_tx_lock_bh(dev); + netif_tx_lock_bh(priv->dev); spin_lock_irqsave(&priv->lock, flags); list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list) @@ -687,37 +686,37 @@ static void __ipoib_reap_ah(struct net_device *dev) } spin_unlock_irqrestore(&priv->lock, flags); - netif_tx_unlock_bh(dev); + netif_tx_unlock_bh(priv->dev); } void ipoib_reap_ah(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, ah_reap_task.work); - struct net_device *dev = priv->dev; - __ipoib_reap_ah(dev); + ipoib_reap_dead_ahs(priv); if (!test_bit(IPOIB_STOP_REAPER, &priv->flags)) queue_delayed_work(priv->wq, &priv->ah_reap_task, round_jiffies_relative(HZ)); } -static void ipoib_flush_ah(struct net_device *dev) +static void ipoib_start_ah_reaper(struct ipoib_dev_priv *priv) { - struct ipoib_dev_priv *priv = ipoib_priv(dev); - - cancel_delayed_work(&priv->ah_reap_task); - flush_workqueue(priv->wq); - ipoib_reap_ah(&priv->ah_reap_task.work); + clear_bit(IPOIB_STOP_REAPER, &priv->flags); + queue_delayed_work(priv->wq, &priv->ah_reap_task, + round_jiffies_relative(HZ)); } -static void ipoib_stop_ah(struct net_device *dev) +static void ipoib_stop_ah_reaper(struct ipoib_dev_priv *priv) { - struct ipoib_dev_priv *priv = ipoib_priv(dev); - set_bit(IPOIB_STOP_REAPER, &priv->flags); - ipoib_flush_ah(dev); + cancel_delayed_work(&priv->ah_reap_task); + /* + * After ipoib_stop_ah_reaper() we always go through + * ipoib_reap_dead_ahs() which ensures the work is really stopped and + * does a final flush out of the dead_ah's list + */ } static int recvs_pending(struct net_device *dev) @@ -846,18 +845,6 @@ timeout: return 0; } -int ipoib_ib_dev_stop(struct net_device *dev) -{ - struct ipoib_dev_priv *priv = ipoib_priv(dev); - - priv->rn_ops->ndo_stop(dev); - - clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); - ipoib_flush_ah(dev); - - return 0; -} - int ipoib_ib_dev_open_default(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -901,10 +888,7 @@ int ipoib_ib_dev_open(struct net_device *dev) return -1; } - clear_bit(IPOIB_STOP_REAPER, &priv->flags); - queue_delayed_work(priv->wq, &priv->ah_reap_task, - round_jiffies_relative(HZ)); - + ipoib_start_ah_reaper(priv); if (priv->rn_ops->ndo_open(dev)) { pr_warn("%s: Failed to open dev\n", dev->name); goto dev_stop; @@ -915,13 +899,20 @@ int ipoib_ib_dev_open(struct net_device *dev) return 0; dev_stop: - set_bit(IPOIB_STOP_REAPER, &priv->flags); - cancel_delayed_work(&priv->ah_reap_task); - set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); - ipoib_ib_dev_stop(dev); + ipoib_stop_ah_reaper(priv); return -1; } +void ipoib_ib_dev_stop(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + priv->rn_ops->ndo_stop(dev); + + clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags); + ipoib_stop_ah_reaper(priv); +} + void ipoib_pkey_dev_check_presence(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -1232,7 +1223,7 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ipoib_mcast_dev_flush(dev); if (oper_up) set_bit(IPOIB_FLAG_OPER_UP, &priv->flags); - ipoib_flush_ah(dev); + ipoib_reap_dead_ahs(priv); } if (level >= IPOIB_FLUSH_NORMAL) @@ -1307,7 +1298,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev) * the neighbor garbage collection is stopped and reaped. * That should all be done now, so make a final ah flush. */ - ipoib_stop_ah(dev); + ipoib_reap_dead_ahs(priv); clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 3cfb682b91b0..ef60e8e4ae67 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1976,6 +1976,8 @@ static void ipoib_ndo_uninit(struct net_device *dev) /* no more works over the priv->wq */ if (priv->wq) { + /* See ipoib_mcast_carrier_on_task() */ + WARN_ON(test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)); flush_workqueue(priv->wq); destroy_workqueue(priv->wq); priv->wq = NULL; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 564388a85603..776e89231c52 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -12,6 +12,7 @@ #include <linux/module.h> #include <linux/rculist.h> +#include <linux/random.h> #include "rtrs-clt.h" #include "rtrs-log.h" @@ -23,6 +24,12 @@ * leads to "false positives" failed reconnect attempts */ #define RTRS_RECONNECT_BACKOFF 1000 +/* + * Wait for additional random time between 0 and 8 seconds + * before starting to reconnect to avoid clients reconnecting + * all at once in case of a major network outage + */ +#define RTRS_RECONNECT_SEED 8 MODULE_DESCRIPTION("RDMA Transport Client"); MODULE_LICENSE("GPL"); @@ -306,7 +313,8 @@ static void rtrs_rdma_error_recovery(struct rtrs_clt_con *con) */ delay_ms = clt->reconnect_delay_sec * 1000; queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, - msecs_to_jiffies(delay_ms)); + msecs_to_jiffies(delay_ms + + prandom_u32() % RTRS_RECONNECT_SEED)); } else { /* * Error can happen just on establishing new connection, @@ -2503,7 +2511,9 @@ reconnect_again: sess->stats->reconnects.fail_cnt++; delay_ms = clt->reconnect_delay_sec * 1000; queue_delayed_work(rtrs_wq, &sess->reconnect_dwork, - msecs_to_jiffies(delay_ms)); + msecs_to_jiffies(delay_ms + + prandom_u32() % + RTRS_RECONNECT_SEED)); } } @@ -2972,7 +2982,7 @@ static int __init rtrs_client_init(void) pr_err("Failed to create rtrs-client dev class\n"); return PTR_ERR(rtrs_clt_dev_class); } - rtrs_wq = alloc_workqueue("rtrs_client_wq", WQ_MEM_RECLAIM, 0); + rtrs_wq = alloc_workqueue("rtrs_client_wq", 0, 0); if (!rtrs_wq) { class_destroy(rtrs_clt_dev_class); return -ENOMEM; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 0d9241f5d9e6..a219bd1bdbc2 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -2150,7 +2150,7 @@ static int __init rtrs_server_init(void) err = PTR_ERR(rtrs_dev_class); goto out_chunk_pool; } - rtrs_wq = alloc_workqueue("rtrs_server_wq", WQ_MEM_RECLAIM, 0); + rtrs_wq = alloc_workqueue("rtrs_server_wq", 0, 0); if (!rtrs_wq) { err = -ENOMEM; goto out_dev_class; |